def test_get_frequently_mutated_co_default_cutoff(): co = cptac.Colon() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(co) dimensions = (612, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test gene names test_coord_names = ((90, 0), (284, 0), (499, 0)) test_vals_names = ('CASP5', 'KRAS', 'SPINK5') total_tumors = 97 # test when there are no missense type mutatations test_coord_CASP5 = ((90, 1), (90, 2), (90, 3)) test_vals_CASP5 = (19 / total_tumors, 0 / total_tumors, 19 / total_tumors) # test when there are no truncation type mutatations test_coord_KRAS = ((284, 1), (284, 2), (284, 3)) test_vals_KRAS = (35 / total_tumors, 35 / total_tumors, 0 / total_tumors) # test when missense and trucation don't add up to equal the fraction mutated #(miss and trunc in same sample) test_coord_ANK2 = ((34, 1), (34, 2), (34, 3)) test_vals_ANK2 = (15 / total_tumors, 13 / total_tumors, 4 / total_tumors) # test when miss and trunc count are the same test_coord_ATM = ((56, 1), (56, 2), (56, 3)) test_vals_ATM = (10 / total_tumors, 7 / total_tumors, 7 / total_tumors) # test close to the cutoff test_coord_SPINK5 = ((499, 1), (499, 2), (499, 3)) test_vals_SPINK5 = (10 / total_tumors, 5 / total_tumors, 7 / total_tumors) # common test test_coord_TP53 = ((554, 1), (554, 2), (554, 3)) test_vals_TP53 = (56 / total_tumors, 38 / total_tumors, 21 / total_tumors) test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_CASP5, test_vals_CASP5), (test_coord_KRAS, test_vals_KRAS), (test_coord_ANK2, test_vals_ANK2), (test_coord_ATM, test_vals_ATM), (test_coord_SPINK5, test_vals_SPINK5), (test_coord_TP53, test_vals_TP53)] for coord, vals in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, vals) print_test_result(PASS)
def test_get_frequently_mutated_co_15_cutoff(): co = cptac.Colon() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(co, 0.15) dimensions = (138, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test gene names test_coord_names = ((15, 0), (66, 0), (102, 0)) test_vals_names = ('CASP5', 'KRAS', 'RYR2') total_tumors = 97 # test no missense type mutatations test_coord_CASP5 = ((15, 1), (15, 2), (15, 3)) test_vals_CASP5 = (19 / total_tumors, 0 / total_tumors, 19 / total_tumors) # test no truncation type mutatations test_coord_KRAS = ((66, 1), (66, 2), (66, 3)) test_vals_KRAS = (35 / total_tumors, 35 / total_tumors, 0 / total_tumors) # test missense and truncation equal fraction mutated test_coord_PIK3CA = ((92, 1), (92, 2), (92, 3)) test_vals_PIK3CA = (24 / total_tumors, 23 / total_tumors, 1 / total_tumors) # test missense and trucation don't equal unique_samples_mutated (miss and trunc in same sample) test_coord_RYR2 = ((102, 1), (102, 2), (102, 3)) test_vals_RYR2 = (21 / total_tumors, 19 / total_tumors, 7 / total_tumors) # test close to the cutoff test_coord_ANK2 = ((6, 1), (6, 2), (6, 3)) test_vals_ANK2 = (15 / total_tumors, 13 / total_tumors, 4 / total_tumors) # common test test_coord_TP53 = ((123, 1), (123, 2), (123, 3)) test_vals_TP53 = (56 / total_tumors, 38 / total_tumors, 21 / total_tumors) test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_CASP5, test_vals_CASP5), (test_coord_KRAS, test_vals_KRAS), (test_coord_PIK3CA, test_vals_PIK3CA), (test_coord_RYR2, test_vals_RYR2), (test_coord_ANK2, test_vals_ANK2), (test_coord_TP53, test_vals_TP53)] for coord, vals in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, vals) print_test_result(PASS)
def test_get_frequently_mutated_en_default_cutoff(): en = cptac.Endometrial() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(en) name = "frequently_mutated" dimensions = (232, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test gene names test_coord_names = ((53, 0), (32, 0), (227, 0)) test_vals_names = ('CTCF', 'CCDC168', 'ZNF536') total_tumors = 95 # test missense and trucation don't equal the unique_sample_mutated #(miss and trunc in same sample) test_coord_CTCF = ((53, 1), (53, 2), (53, 3)) test_vals_CTCF = (27 / total_tumors, 9 / total_tumors, 23 / total_tumors) # testmissense and trucation values are equal test_coord_CCDC168 = ((32, 1), (32, 2), (32, 3)) test_vals_CCDC168 = (16 / total_tumors, 11 / total_tumors, 11 / total_tumors) # test no truncation type mutatations test_coord_ZNF536 = ((227, 1), (227, 2), (227, 3)) test_vals_ZNF536 = (12 / total_tumors, 12 / total_tumors, 0 / total_tumors) # test close to cutoff test_coord_DICER1 = ((61, 1), (61, 2), (61, 3)) test_vals_DICER1 = (10 / total_tumors, 10 / total_tumors, 1 / total_tumors) # common test test_coord_TP53 = ((205, 1), (205, 2), (205, 3)) test_vals_TP53 = (21 / total_tumors, 15 / total_tumors, 7 / total_tumors) test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_CTCF, test_vals_CTCF), (test_coord_CCDC168, test_vals_CCDC168), (test_coord_ZNF536, test_vals_ZNF536), (test_coord_DICER1, test_vals_DICER1), (test_coord_TP53, test_vals_TP53)] for coord, vals in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, vals) print_test_result(PASS)
def test_get_frequently_mutated_ov_05_cutoff(): ov = cptac.Ovarian() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(ov, 0.05) dimensions = (142, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test genes names test_coord_names = ((133, 0), (127, 0), (141, 0)) test_vals_names = ('WDFY4', 'TP53', 'ZNF865') total_tumors = 83 #test missense and trucation not equal to unique_samples_mutated #(miss and trunc in same sample) test_coord_WDFY4 = ((133, 1), (133, 2), (133, 3)) test_vals_WDFY4 = (10 / total_tumors, 8 / total_tumors, 3 / total_tumors) # test miss and trunc almost equal test_coord_CDK12 = ((11, 1), (11, 2), (11, 3)) test_vals_CDK12 = (6 / total_tumors, 4 / total_tumors, 3 / total_tumors) # test no truncation mutations test_coord_ZNF865 = ((141, 1), (141, 2), (141, 3)) test_vals_ZNF865 = (5 / total_tumors, 5 / total_tumors, 0 / total_tumors) # test close to cutoff test_coord_SYNE1 = ((122, 1), (122, 2), (122, 3)) test_vals_SYNE1 = (5 / total_tumors, 5 / total_tumors, 1 / total_tumors) # common test and highest count test_coord_TP53 = ((127, 1), (127, 2), (127, 3)) test_vals_TP53 = (77 / total_tumors, 50 / total_tumors, 27 / total_tumors) #CHECK silent mut not counted test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_WDFY4, test_vals_WDFY4), (test_coord_CDK12, test_vals_CDK12), (test_coord_ZNF865, test_vals_ZNF865), (test_coord_SYNE1, test_vals_SYNE1), (test_coord_TP53, test_vals_TP53)] for coord, vals in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, vals) print_test_result(PASS)
def test_get_frequently_mutated_en_cutoff_20_cutoff(): en = cptac.Endometrial() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(en, cutoff=0.2) dimensions = (10, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test gene names test_coord_names = ((0, 0), (2, 0), (8, 0)) test_vals_names = ('ARID1A', 'CTNNB1', 'TP53') total_tumors = 95 # test missense and trucation don't equal the unique_samples_mutated #(miss and trunc in same sample and counted in each category) test_coord_ARID1A = ((0, 1), (0, 2), (0, 3)) test_vals_ARID1A = (43 / total_tumors, 13 / total_tumors, 38 / total_tumors) # test no truncation type mutatations test_coord_CTNNB1 = ((2, 1), (2, 2), (2, 3)) test_vals_CTNNB1 = (29 / total_tumors, 29 / total_tumors, 0 / total_tumors) # test close to the cutoff test_coord_ZFHX3 = ((9, 1), (9, 2), (9, 3)) test_vals_ZFHX3 = (21 / total_tumors, 8 / total_tumors, 16 / total_tumors) # test miss and trunc almost equal test_coord_KMT2B = ((3, 1), (3, 2), (3, 3)) test_vals_KMT2B = (23 / total_tumors, 11 / total_tumors, 12 / total_tumors) # common test test_coord_TP53 = ((8, 1), (8, 2), (8, 3)) test_vals_TP53 = (21 / total_tumors, 15 / total_tumors, 7 / total_tumors) test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_ARID1A, test_vals_ARID1A), (test_coord_CTNNB1, test_vals_CTNNB1), (test_coord_ZFHX3, test_vals_ZFHX3), (test_coord_KMT2B, test_vals_KMT2B), (test_coord_TP53, test_vals_TP53)] for coord, vals in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, vals) print_test_result(PASS)
def test_get_frequently_mutated_renal_01_cutoff(): rc = cptac.Ccrcc() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(rc, cutoff=0.01) dimensions = (1106, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test genes names test_coord_names = ((11, 0), (992, 0), (1080, 0)) test_vals_names = ('ABCC3', 'TTN', 'ZNF532') total_tumors = 110 # test no missense test_coord_ABCC3 = ((11, 1), (11, 2), (11, 3)) test_vals_ABCC3 = (2 / total_tumors, 0 / total_tumors, 2 / total_tumors) # test no truncation and close to cutoff test_coord_ZNF532 = ((1080, 1), (1080, 2), (1080, 3)) test_vals_ZNF532 = (2 / total_tumors, 2 / total_tumors, 0 / total_tumors) # test miss and trunc equal to unique_samples_mutated test_coord_NAV3 = ((611, 1), (611, 2), (611, 3)) test_vals_NAV3 = (7 / total_tumors, 5 / total_tumors, 2 / total_tumors) # check that silent mutations are not counted (TTN has many silent mutations) # and missense and trucation not equal to unique_samples_mutated test_coord_TTN = ((992, 1), (992, 2), (992, 3)) test_vals_TTN = (13 / total_tumors, 10 / total_tumors, 4 / total_tumors) # common test and highest count test_coord_VHL = ((1019, 1), (1019, 2), (1019, 3)) test_vals_VHL = (82 / total_tumors, 33 / total_tumors, 49 / total_tumors) test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_ABCC3, test_vals_ABCC3), (test_coord_ZNF532, test_vals_ZNF532), (test_coord_NAV3, test_vals_NAV3), (test_coord_TTN, test_vals_TTN), (test_coord_VHL, test_vals_VHL)] for coord, val in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, val) print_test_result(PASS)
def test_get_frequently_mutated_renal_default_cutoff(): rc = cptac.Ccrcc() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(rc) dimensions = (6, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test genes names test_coord_names = ((0, 0), (2, 0), (4, 0)) test_vals_names = ('BAP1', 'PBRM1', 'TTN') total_tumors = 110 # test miss and trunc equal to unique_samples_mutated test_coord_BAP1 = ((0, 1), (0, 2), (0, 3)) test_vals_BAP1 = (17 / total_tumors, 7 / total_tumors, 10 / total_tumors) # test high truncation, low missense count test_coord_PBRM1 = ((2, 1), (2, 2), (2, 3)) test_vals_PBRM1 = (44 / total_tumors, 8 / total_tumors, 37 / total_tumors) # check that silent mutations are not counted (TTN has many silent mutations) # and missense and trucation not equal to unique_samples_mutated test_coord_TTN = ((4, 1), (4, 2), (4, 3)) test_vals_TTN = (13 / total_tumors, 10 / total_tumors, 4 / total_tumors) # test close to cutoff test_coord_SETD2 = ((3, 1), (3, 2), (3, 3)) test_vals_SETD2 = (15 / total_tumors, 2 / total_tumors, 13 / total_tumors) # common test and highest count test_coord_VHL = ((5, 1), (5, 2), (5, 3)) test_vals_VHL = (82 / total_tumors, 33 / total_tumors, 49 / total_tumors) test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_BAP1, test_vals_BAP1), (test_coord_PBRM1, test_vals_PBRM1), (test_coord_TTN, test_vals_TTN), (test_coord_SETD2, test_vals_SETD2), (test_coord_VHL, test_vals_VHL)] for coord, vals in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, vals) print_test_result(PASS)
def test_get_frequently_mutated_ov_default_cutoff(): ov = cptac.Ovarian() print('Running get_frequently_mutated...') df = ut.get_frequently_mutated(ov) dimensions = (16, 4) headers = ['Gene', 'Unique_Samples_Mut', 'Missense_Mut', 'Truncation_Mut'] # test genes names test_coord_names = ((15, 0), (13, 0), (2, 0)) test_vals_names = ('WDFY4', 'TP53', 'MT-CO1') total_tumors = 83 #test missense and trucation not equal to unique_samples_mutated #(miss and trunc in same sample) test_coord_WDFY4 = ((15, 1), (15, 2), (15, 3)) test_vals_WDFY4 = (10 / total_tumors, 8 / total_tumors, 3 / total_tumors) # test miss and trunc equal to unique_samples_mutated test_coord_MUC4 = ((8, 1), (8, 2), (8, 3)) test_vals_MUC4 = (27 / total_tumors, 26 / total_tumors, 1 / total_tumors) # test no truncation mutations test_coord_MTCO1 = ((2, 1), (2, 2), (2, 3)) test_vals_MTCO1 = (10 / total_tumors, 10 / total_tumors, 0 / total_tumors) # test close to cutoff test_coord_FSIP2 = ((1, 1), (1, 2), (1, 3)) test_vals_FSIP2 = (9 / total_tumors, 8 / total_tumors, 2 / total_tumors) # common test and highest count test_coord_TP53 = ((13, 1), (13, 2), (13, 3)) test_vals_TP53 = (77 / total_tumors, 50 / total_tumors, 27 / total_tumors) test_coord_vals = [(test_coord_names, test_vals_names), (test_coord_WDFY4, test_vals_WDFY4), (test_coord_MUC4, test_vals_MUC4), (test_coord_MTCO1, test_vals_MTCO1), (test_coord_FSIP2, test_vals_FSIP2), (test_coord_TP53, test_vals_TP53)] for coord, vals in test_coord_vals: PASS = check_getter(df, dimensions, headers, coord, vals) print_test_result(PASS)