def main(): parser = argparse.ArgumentParser() parser.add_argument('--cancerType', dest='type',\ help='Cancer type to be collected') parser.add_argument('--getData',dest='get', action='store_true',\ default=False,help='Set flag to get all data') opts = parser.parse_args() if opts.get: for ds in ['brca', 'ccrcc', 'colon', 'ovarian', 'endometrial', 'luad']: cptac.download(dataset=ds) if opts.type.lower() == 'brca': dat = cptac.Brca() elif opts.type.lower() == 'ccrcc': dat = cptac.Ccrcc() elif opts.type.lower() == 'coad': dat = cptac.Colon() elif opts.type.lower() == 'ovca': dat = cptac.Ovarian() elif opts.type.lower() == 'luad': dat = cptac.Luad() elif opts.type.lower() == 'endometrial': dat = cptac.Endometrial() else: exit() df = dat.get_phosphoproteomics() pdf = dat.get_proteomics() # df.columns = [' '.join(col).strip() for col in df.columns.values] df.to_csv(path_or_buf="phos_file.tsv", sep='\t') pdf.to_csv(path_or_buf='prot_file.tsv', sep='\t')
def getDataForCancer(ctype): if ctype.lower() == 'brca': dat = cptac.Brca() elif ctype.lower() == 'ccrcc': dat = cptac.Ccrcc() elif ctype.lower() == 'coad': dat = cptac.Colon() elif ctype.lower() == 'ovca': dat = cptac.Ovarian() elif ctype.lower() == 'luad': dat = cptac.Luad() elif ctype.lower() == 'endometrial': dat = cptac.Endometrial() else: exit() return dat
def __init__(self): cptac.download(dataset="endometrial", version='latest') # cptac.download(dataset="brca", version='latest') # cptac.download(dataset="gbm", version='latest') # cptac.download(dataset="hsncc", version='latest') # cptac.download(dataset="luad", version='latest') cptac.download(dataset="ovarian", version='latest') cptac.download(dataset="ccrcc", version='latest') cptac.download(dataset="colon", version='latest') self.en = cptac.Endometrial() # self.brca = cptac.Brca() # self.gbm = cptac.Gbm() # self.hsncc = cptac.Hnscc() # self.luad= cptac.Luad() self.ovarian = cptac.Ovarian() self.ccrcc = cptac.Ccrcc() self.colon = cptac.Colon() # self.datasets = list(self.en,self.brca,self.gbm,self.hsncc,self.luad,self.ovarian,self.ccrcc) self.datasets = list([self.en, self.ovarian, self.ccrcc, self.colon])
def cptacData(): ''' We need to collect and load CPTAC data ''' print("Loading cptac datasets") #we need to make sure all datasets are downloaded ##here are the cancers that are available without login information allcans = ['brca', 'ccrcc', 'colon', 'ovarian', 'luad',\ #'hnscc','gbm','lscc',\ 'endometrial'] print("Downloading cptac data") for ct in allcans: cptac.download(dataset=ct) #then we load them into a dictionary fdict = {'brca':cptac.Brca(), 'ccrcc':cptac.Ccrcc(),\ 'colon':cptac.Colon(), 'ovarian':cptac.Ovarian(),\ #'hnscc':cptac.Hnscc(),'gbm':cptac.Gbm(), 'lscc':cptac.Lscc(),\ 'endometrial':cptac.Endometrial(), 'luad':cptac.Luad()} return fdict
def test_get_frequently_mutated_ov_05_cutoff(): ov = cptac.Ovarian() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(ov, 0.05) dimensions = (142, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test genes names test_coord_names = ((133, 0), (127, 0), (141, 0)) test_vals_names = ('WDFY4', 'TP53', 'ZNF865') total_tumors = 83 #test missense and trucation not equal to unique_samples_mutated #(miss and trunc in same sample) test_coord_WDFY4 = ((133, 1), (133, 2), (133, 3)) test_vals_WDFY4 = (10 / total_tumors, 8 / total_tumors, 3 / total_tumors) # test miss and trunc almost equal test_coord_CDK12 = ((11, 1), (11, 2), (11, 3)) test_vals_CDK12 = (6 / total_tumors, 4 / total_tumors, 3 / total_tumors) # test no truncation mutations test_coord_ZNF865 = ((141, 1), (141, 2), (141, 3)) test_vals_ZNF865 = (5 / total_tumors, 5 / total_tumors, 0 / total_tumors) # test close to cutoff test_coord_SYNE1 = ((122, 1), (122, 2), (122, 3)) test_vals_SYNE1 = (5 / total_tumors, 5 / total_tumors, 1 / total_tumors) # common test and highest count test_coord_TP53 = ((127, 1), (127, 2), (127, 3)) test_vals_TP53 = (77 / total_tumors, 50 / total_tumors, 27 / total_tumors) #CHECK silent mut not counted test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_WDFY4, test_vals_WDFY4), (test_coord_CDK12, test_vals_CDK12), (test_coord_ZNF865, test_vals_ZNF865), (test_coord_SYNE1, test_vals_SYNE1), (test_coord_TP53, test_vals_TP53)] for coord, vals in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, vals) print_test_result(PASS)
def test_get_frequently_mutated_ov_default_cutoff(): ov = cptac.Ovarian() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(ov) dimensions = (16, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test genes names test_coord_names = ((15, 0), (13, 0), (2, 0)) test_vals_names = ('WDFY4', 'TP53', 'MT-CO1') total_tumors = 83 #test missense and trucation not equal to unique_samples_mutated #(miss and trunc in same sample) test_coord_WDFY4 = ((15, 1), (15, 2), (15, 3)) test_vals_WDFY4 = (10 / total_tumors, 8 / total_tumors, 3 / total_tumors) # test miss and trunc equal to unique_samples_mutated test_coord_MUC4 = ((8, 1), (8, 2), (8, 3)) test_vals_MUC4 = (27 / total_tumors, 26 / total_tumors, 1 / total_tumors) # test no truncation mutations test_coord_MTCO1 = ((2, 1), (2, 2), (2, 3)) test_vals_MTCO1 = (10 / total_tumors, 10 / total_tumors, 0 / total_tumors) # test close to cutoff test_coord_FSIP2 = ((1, 1), (1, 2), (1, 3)) test_vals_FSIP2 = (9 / total_tumors, 8 / total_tumors, 2 / total_tumors) # common test and highest count test_coord_TP53 = ((13, 1), (13, 2), (13, 3)) test_vals_TP53 = (77 / total_tumors, 50 / total_tumors, 27 / total_tumors) test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_WDFY4, test_vals_WDFY4), (test_coord_MUC4, test_vals_MUC4), (test_coord_MTCO1, test_vals_MTCO1), (test_coord_FSIP2, test_vals_FSIP2), (test_coord_TP53, test_vals_TP53)] for coord, vals in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, vals) print_test_result(PASS)
def downloadCptac(): # To view available datasets, enter 'cptac.list_data()'. cptac.list_datasets() cptac.download(dataset = "endometrial") cptac.download(dataset = 'colon') cptac.download(dataset = 'ovarian') cptac.download(dataset = 'RenalCcrcc') #cptac.download(dataset ='luad') #cptac.download(dataset ='brca') downloadCptac() endometrialData = cptac.Endometrial() colorectalData = cptac.Colon() ovarianData = cptac.Ovarian() renalData = cptac.RenalCcrcc() lungData = cptac.Luad() breastData = cptac.Brca() def listDataForEachCancer(): print("endometrial") endometrialData.list_data() print("\n\ncolorectal") colorectalData.list_data() print("\n\novarian") ovarianData.list_data() print("\n\nrenal") renalData.list_data() listDataForEachCancer()