Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cancerType', dest='type',\
                        help='Cancer type to be collected')
    parser.add_argument('--getData',dest='get', action='store_true',\
                        default=False,help='Set flag to get all data')
    opts = parser.parse_args()

    if opts.get:
        for ds in ['brca', 'ccrcc', 'colon', 'ovarian', 'endometrial', 'luad']:
            cptac.download(dataset=ds)

    if opts.type.lower() == 'brca':
        dat = cptac.Brca()
    elif opts.type.lower() == 'ccrcc':
        dat = cptac.Ccrcc()
    elif opts.type.lower() == 'coad':
        dat = cptac.Colon()
    elif opts.type.lower() == 'ovca':
        dat = cptac.Ovarian()
    elif opts.type.lower() == 'luad':
        dat = cptac.Luad()
    elif opts.type.lower() == 'endometrial':
        dat = cptac.Endometrial()
    else:
        exit()

    df = dat.get_phosphoproteomics()
    pdf = dat.get_proteomics()
    # df.columns = [' '.join(col).strip() for col in df.columns.values]

    df.to_csv(path_or_buf="phos_file.tsv", sep='\t')
    pdf.to_csv(path_or_buf='prot_file.tsv', sep='\t')
Ejemplo n.º 2
0
def getDataForCancer(ctype):
    if ctype.lower() == 'brca':
        dat = cptac.Brca()
    elif ctype.lower() == 'ccrcc':
        dat = cptac.Ccrcc()
    elif ctype.lower() == 'coad':
        dat = cptac.Colon()
    elif ctype.lower() == 'ovca':
        dat = cptac.Ovarian()
    elif ctype.lower() == 'luad':
        dat = cptac.Luad()
    elif ctype.lower() == 'endometrial':
        dat = cptac.Endometrial()
    else:
        exit()
    return dat
Ejemplo n.º 3
0
def test_get_frequently_mutated_co_default_cutoff():
    co = cptac.Colon()
    print('Running get_frequently_mutated...')
    df = ut.get_frequently_mutated(co)

    dimensions = (612, 4)
    headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut']
    # test gene names
    test_coord_names = ((90, 0), (284, 0), (499, 0))
    test_vals_names = ('CASP5', 'KRAS', 'SPINK5')

    total_tumors = 97
    # test when there are no missense type mutatations
    test_coord_CASP5 = ((90, 1), (90, 2), (90, 3))
    test_vals_CASP5 = (19 / total_tumors, 0 / total_tumors, 19 / total_tumors)
    # test when there are no truncation type mutatations
    test_coord_KRAS = ((284, 1), (284, 2), (284, 3))
    test_vals_KRAS = (35 / total_tumors, 35 / total_tumors, 0 / total_tumors)
    # test when missense and trucation don't add up to equal the fraction mutated
    #(miss and trunc in same sample)
    test_coord_ANK2 = ((34, 1), (34, 2), (34, 3))
    test_vals_ANK2 = (15 / total_tumors, 13 / total_tumors, 4 / total_tumors)
    # test when miss and trunc count are the same
    test_coord_ATM = ((56, 1), (56, 2), (56, 3))
    test_vals_ATM = (10 / total_tumors, 7 / total_tumors, 7 / total_tumors)
    # test close to the cutoff
    test_coord_SPINK5 = ((499, 1), (499, 2), (499, 3))
    test_vals_SPINK5 = (10 / total_tumors, 5 / total_tumors, 7 / total_tumors)
    # common test
    test_coord_TP53 = ((554, 1), (554, 2), (554, 3))
    test_vals_TP53 = (56 / total_tumors, 38 / total_tumors, 21 / total_tumors)

    test_coord_vals = [(test_coord_names, test_vals_names),
                       (test_coord_CASP5, test_vals_CASP5),
                       (test_coord_KRAS, test_vals_KRAS),
                       (test_coord_ANK2, test_vals_ANK2),
                       (test_coord_ATM, test_vals_ATM),
                       (test_coord_SPINK5, test_vals_SPINK5),
                       (test_coord_TP53, test_vals_TP53)]

    for coord, vals in test_coord_vals:
        PASS = check_getter(df, dimensions, headers, coord, vals)

    print_test_result(PASS)
Ejemplo n.º 4
0
def test_get_frequently_mutated_co_15_cutoff():
    co = cptac.Colon()
    print('Running get_frequently_mutated...')
    df = ut.get_frequently_mutated(co, 0.15)

    dimensions = (138, 4)
    headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut']
    # test gene names
    test_coord_names = ((15, 0), (66, 0), (102, 0))
    test_vals_names = ('CASP5', 'KRAS', 'RYR2')

    total_tumors = 97
    # test no missense type mutatations
    test_coord_CASP5 = ((15, 1), (15, 2), (15, 3))
    test_vals_CASP5 = (19 / total_tumors, 0 / total_tumors, 19 / total_tumors)
    # test no truncation type mutatations
    test_coord_KRAS = ((66, 1), (66, 2), (66, 3))
    test_vals_KRAS = (35 / total_tumors, 35 / total_tumors, 0 / total_tumors)
    # test missense and truncation equal fraction mutated
    test_coord_PIK3CA = ((92, 1), (92, 2), (92, 3))
    test_vals_PIK3CA = (24 / total_tumors, 23 / total_tumors, 1 / total_tumors)
    # test missense and trucation don't equal unique_samples_mutated (miss and trunc in same sample)
    test_coord_RYR2 = ((102, 1), (102, 2), (102, 3))
    test_vals_RYR2 = (21 / total_tumors, 19 / total_tumors, 7 / total_tumors)
    # test close to the cutoff
    test_coord_ANK2 = ((6, 1), (6, 2), (6, 3))
    test_vals_ANK2 = (15 / total_tumors, 13 / total_tumors, 4 / total_tumors)
    # common test
    test_coord_TP53 = ((123, 1), (123, 2), (123, 3))
    test_vals_TP53 = (56 / total_tumors, 38 / total_tumors, 21 / total_tumors)

    test_coord_vals = [(test_coord_names, test_vals_names),
                       (test_coord_CASP5, test_vals_CASP5),
                       (test_coord_KRAS, test_vals_KRAS),
                       (test_coord_PIK3CA, test_vals_PIK3CA),
                       (test_coord_RYR2, test_vals_RYR2),
                       (test_coord_ANK2, test_vals_ANK2),
                       (test_coord_TP53, test_vals_TP53)]

    for coord, vals in test_coord_vals:
        PASS = check_getter(df, dimensions, headers, coord, vals)

    print_test_result(PASS)
Ejemplo n.º 5
0
 def __init__(self):
     cptac.download(dataset="endometrial", version='latest')
     # cptac.download(dataset="brca", version='latest')
     # cptac.download(dataset="gbm", version='latest')
     # cptac.download(dataset="hsncc", version='latest')
     # cptac.download(dataset="luad", version='latest')
     cptac.download(dataset="ovarian", version='latest')
     cptac.download(dataset="ccrcc", version='latest')
     cptac.download(dataset="colon", version='latest')
     self.en = cptac.Endometrial()
     # self.brca = cptac.Brca()
     # self.gbm = cptac.Gbm()
     # self.hsncc = cptac.Hnscc()
     # self.luad= cptac.Luad()
     self.ovarian = cptac.Ovarian()
     self.ccrcc = cptac.Ccrcc()
     self.colon = cptac.Colon()
     # self.datasets = list(self.en,self.brca,self.gbm,self.hsncc,self.luad,self.ovarian,self.ccrcc)
     self.datasets = list([self.en, self.ovarian, self.ccrcc, self.colon])
Ejemplo n.º 6
0
def cptacData():
    '''
    We need to collect and load CPTAC data
    '''
    print("Loading cptac datasets")
    #we need to make sure all datasets are downloaded
    ##here are the cancers that are available without login information
    allcans = ['brca', 'ccrcc', 'colon', 'ovarian', 'luad',\
             #'hnscc','gbm','lscc',\
             'endometrial']
    print("Downloading cptac data")
    for ct in allcans:
        cptac.download(dataset=ct)
    #then we load them into a dictionary
    fdict = {'brca':cptac.Brca(), 'ccrcc':cptac.Ccrcc(),\
           'colon':cptac.Colon(), 'ovarian':cptac.Ovarian(),\
             #'hnscc':cptac.Hnscc(),'gbm':cptac.Gbm(), 'lscc':cptac.Lscc(),\
           'endometrial':cptac.Endometrial(), 'luad':cptac.Luad()}
    return fdict
Ejemplo n.º 7
0
from scipy.stats import pearsonr


def downloadCptac():
    # To view available datasets, enter 'cptac.list_data()'.
    cptac.list_datasets()
    cptac.download(dataset = "endometrial")
    cptac.download(dataset = 'colon')
    cptac.download(dataset = 'ovarian')
    cptac.download(dataset = 'RenalCcrcc')
    #cptac.download(dataset ='luad')
    #cptac.download(dataset ='brca')
downloadCptac()

endometrialData = cptac.Endometrial()
colorectalData = cptac.Colon()
ovarianData = cptac.Ovarian()
renalData = cptac.RenalCcrcc()
lungData = cptac.Luad()
breastData = cptac.Brca()

def listDataForEachCancer():
    print("endometrial")
    endometrialData.list_data()
    print("\n\ncolorectal")
    colorectalData.list_data()
    print("\n\novarian")
    ovarianData.list_data()
    print("\n\nrenal")
    renalData.list_data()