def test_sc1b(): b = aggregation.SC1B_aggregation() aucs = b.plot_aggr_best_score(M=2) easydev.assert_list_almost_equal( aucs['aggregation'], [0.76422010245539662, 0.76991697579932872]) aucs = b.plot_aggr_random(2, 2)
def test_sc1a(): b = aggregation.SC1A_aggregation() #b.load_submissions() aucs = b.plot_aggr_best_score(M=2) easydev.assert_list_almost_equal( aucs['aggregation'], [0.78202740472227761, 0.79045976573183718]) aucs = b.plot_aggr_random(2, 2)
def test_sc1a(): b = aggregation.SC1A_aggregation() #b.load_submissions() aucs = b.plot_aggr_best_score(M=2) easydev.assert_list_almost_equal(aucs['aggregation'], [0.78202740472227761, 0.79045976573183718]) aucs = b.plot_aggr_random(2,2)
def test_sc1b(): b = aggregation.SC1B_aggregation() aucs = b.plot_aggr_best_score(M=2) easydev.assert_list_almost_equal(aucs['aggregation'], [0.76422010245539662, 0.76991697579932872]) aucs = b.plot_aggr_random(2,2)
def test_d5c3_A(): s = D5C3() filenames = s.download_template('A100') res = s.score_challengeA(filenames[0], 'A100_1') assert_list_almost_equal([res['aupr'], res['auroc'], res['p_aupr'], res['p_auroc']], [0.0072659, 0.71110535, 0.9989876, 2.6334871e-35])
def test_anova_one_drug_one_feature(): an = ANOVA(ic50_test) # test 1 drug drug_id = 'Drug_999_IC50' df = an.anova_one_drug_one_feature( drug_id=drug_id, feature_name='ABCB1_mut', show=True) control = {'DRUG_ID': {1: drug_id}, 'DRUG_NAME': {1: drug_id}, 'DRUG_TARGET': {1: drug_id}, 'FEATURE': {1: 'ABCB1_mut'}, 'ANOVA_FEATURE_pval': {1: 0.86842684367357359}, 'FEATURE_IC50_T_pval': {1: 0.48586107208790896}, 'FEATURE_IC50_effect_size': {1: 0.31407773405409201}, 'FEATURE_delta_MEAN_IC50': {1: -0.36105662590553411}, 'FEATUREneg_Glass_delta': {1: 0.31296252976074801}, 'FEATUREneg_IC50_sd': {1: 1.1536736560173895}, 'FEATUREneg_logIC50_MEAN': {1: 2.8007757068403043}, 'FEATUREpos_Glass_delta': {1: 0.53754504818376181}, 'FEATUREpos_IC50_sd': {1: 0.67167696386648634}, 'FEATUREpos_logIC50_MEAN': {1: 2.4397190809347702}, 'ANOVA_MSI_pval': {1: 0.14598946672374763}, 'N_FEATURE_neg': {1: 370}, 'N_FEATURE_pos': {1: 5}, 'ANOVA_TISSUE_pval': {1: 3.2808255732569986e-06}} control = pd.DataFrame(control) assert_list_almost_equal(df,control)
def test_qvalue(): pvalues = np.array([0.8, 0.5, 0.5] + list(10**-np.linspace(1, 10, 9))) # This gives same answer as in R qvalue library # pvalues = c(c(0.8,0.5,0.5), 10**(-seq(1,10,2))) # qvalue(pvalues) # Note that here we enforce the pi0 value to be the same as in R # If not, this particular fails because pi0 is negative... # qvalue not very robust. qv = qvalue.QValue(pvalues, pi0=0.1109898) assert_list_almost_equal( qv.qvalue(), np.array([ 8.87918400e-02, 6.05398909e-02, 6.05398909e-02, 1.47986400e-02, 1.24845912e-03, 1.06995688e-04, 9.36080212e-06, 8.42353356e-07, 7.89594880e-08, 7.89483504e-09, 8.88043662e-10, 1.33187760e-10 ])) try: qv = qvalue.QValue(pvalues) assert False except: assert True
def test_sc2b(): b = aggregation.SC2B_aggregation(version=1) #b.load_submissions() aucs = b.plot_aggr_best_score(N=2) easydev.assert_list_almost_equal( aucs['aggregation'], [0.25428664093754277, 0.22176905909721403]) aucs = b.plot_aggr_random(N=2, Nmax=2)
def test_sc2a(): a = aggregation.SC2A_aggregation(version=1) #a.load_submissions() aucs = a.plot_aggr_best_score(N=2) easydev.assert_list_almost_equal( aucs['aggregation'], [0.48434056686051674, 0.46329666463290453]) aucs = a.plot_aggr_random(N=2, Nmax=2)
def test_d5c3_A(): s = D5C3() filenames = s.download_template('A100') res = s.score_challengeA(filenames[0], 'A100_1') assert_list_almost_equal( [res['aupr'], res['auroc'], res['p_aupr'], res['p_auroc']], [0.0072659, 0.71110535, 0.9989876, 2.6334871e-35])
def test_get_boxplot_data(): an = ANOVA(ic50_test) odof = an._get_one_drug_one_feature_data('Drug_1047_IC50','TP53_mut') bb = BoxPlots(odof) data = bb._get_boxplot_data(mode='msi') assert data[1] == ['***MSI-stable neg', '***MSI-stable pos', '**MSI-unstable neg', '**MSI-unstable pos'] expected = [2.0108071495663922e-47, 0.0012564798887037905] assert_list_almost_equal([data[2][0], data[2][1]], expected)
def test_skcm(): an = ANOVA(gdsctools_data("test_ANOVA_input_v17_skcm.txt"), gdsctools_data("test_ANOVA_input_v17_skcm.txt")) an.settings.pvalue_correction_method = 'qvalue' results = an.anova_all() # This create a temp directory called "skin" report = ANOVAReport(an, results) diag = report.diagnostics() diag = diag.set_index('text') assert diag.loc["Total number of ANOVA tests performed"].values == 5194 assert diag.loc["Percentage of tests performed"].values == 81.66 assert diag.loc["Total number of tested drugs"].values == 265 assert diag.loc["Total number of genomic features used"].values == 24 assert diag.loc["Total number of screened cell lines"].values == 55 assert diag.loc[ "MicroSatellite instability included as factor"].values == False assert diag.loc["Total number of significant associations"].values == 13 assert diag.loc[" - sensitive"].values == 8 assert diag.loc[" - resistant"].values == 5 assert diag.loc["p-value significance threshold"].values == 0.001 assert diag.loc["FDR significance threshold"].values == 25 assert diag.loc["Range of significant p-values"].values[ 0] == "[9.87e-08, 0.0006358]" assert diag.loc["Range of significant % FDRs"].values[ 0] == "[0.04777 23.67]" assert_list_almost_equal( report.df.iloc[0].values, np.array([ 1, 'BRAF_mut', 1373, None, None, 10, 35, -2.1750318847152079, 2.9802302648104275, -5.155262149525635, 2.291545942648078, 3.2964327113036669, 2.1492596576572947, 1.5638912124151449, 2.3986223028747027, 9.8695117331183039e-08, 9.8695117331182668e-08, None, None, None, 0.047768436788292422 ], dtype=object)) assert_list_almost_equal([report.df.iloc[12]['ANOVA_FEATURE_FDR']], [23.671582956786185]) report.create_html_pages(onweb=False) assert os.path.exists("skin/index.html") assert os.path.exists("skin/associations/manova.html") assert os.path.exists("skin/associations/a1.html") assert os.path.exists("skin/associations/BRAF_mut.html") assert os.path.exists("skin/associations/drug_1047.html")
def test_d5c3_B(): s = D5C3() s.N_pvalues = 10 filenames = s.download_template('B') df = s.score_challengeB(filenames) df = df['SysGenB1'] del df['pvalues'] # this is not deterministic so let us delete it assert_list_almost_equal(df.values, [-0.19757481, 0.09499444, 0.85234405, 0.30814104, 1.33696271])
def test_get_boxplot_data(): an = ANOVA(ic50_test) odof = an._get_one_drug_one_feature_data(1047, 'TP53_mut') bb = BoxPlots(odof) data = bb._get_boxplot_data(mode='msi') assert data[1] == [ '***MSI-stable neg', '***MSI-stable pos', '**MSI-unstable neg', '**MSI-unstable pos' ] expected = [2.0108071495663922e-47, 0.0012564798887037905] assert_list_almost_equal([data[2][0], data[2][1]], expected)
def test_d5c3_B(): s = D5C3() s.N_pvalues = 10 filenames = s.download_template('B') df = s.score_challengeB(filenames) df = df['SysGenB1'] del df['pvalues'] # this is not deterministic so let us delete it assert_list_almost_equal( df.values, [-0.19757481, 0.09499444, 0.85234405, 0.30814104, 1.33696271])
def test_odof_with_without_media(): gdsc = ANOVA(ic50_test) _res = gdsc.anova_one_drug_one_feature('Drug_1047_IC50', 'TP53_mut') dd1 = gdsc._get_anova_summary(gdsc.data_lm, output='dict') assert_list_almost_equal([dd1['feature'], dd1['msi'], dd1['tissue']], [1.5750735472022118e-58, 0.025902887791637515, 5.541879283763767e-44]) gdsc = ANOVA(ic50_test, set_media_factor=True) _res = gdsc.anova_one_drug_one_feature('Drug_1047_IC50', 'TP53_mut') dd2 = gdsc._get_anova_summary(gdsc.data_lm, output='dict') assert_list_almost_equal([dd2['feature'], dd2['media'], dd2['msi'], dd2['tissue']], [ 2.9236500715529455e-58, 0.7762487502315283, 0.023777744527686766, 1.5729157319290974e-44])
def test_odof_with_without_media(): gdsc = ANOVA(ic50_test) _res = gdsc.anova_one_drug_one_feature('Drug_1047_IC50', 'TP53_mut') dd1 = gdsc._get_anova_summary(gdsc.data_lm, output='dict') assert_list_almost_equal( [dd1['feature'], dd1['msi'], dd1['tissue']], [1.5750735472022118e-58, 0.025902887791637515, 5.541879283763767e-44]) gdsc = ANOVA(ic50_test, set_media_factor=True) _res = gdsc.anova_one_drug_one_feature('Drug_1047_IC50', 'TP53_mut') dd2 = gdsc._get_anova_summary(gdsc.data_lm, output='dict') assert_list_almost_equal( [dd2['feature'], dd2['media'], dd2['msi'], dd2['tissue']], [ 2.9236500715529455e-58, 0.7762487502315283, 0.023777744527686766, 1.5729157319290974e-44 ])
def test_skcm(): an = ANOVA(gdsctools_data("test_ANOVA_input_v17_skcm.txt"), gdsctools_data("test_ANOVA_input_v17_skcm.txt")) an.settings.pvalue_correction_method = 'qvalue' results = an.anova_all() # This create a temp directory called "skin" report = ANOVAReport(an, results) diag = report.diagnostics() diag = diag.set_index('text') assert diag.ix["Total number of ANOVA tests performed"].values == 5194 assert diag.ix["Percentage of tests performed"].values == 81.66 assert diag.ix["Total number of tested drugs"].values == 265 assert diag.ix["Total number of genomic features used"].values == 24 assert diag.ix["Total number of screened cell lines"].values == 55 assert diag.ix["MicroSatellite instability included as factor"].values == False assert diag.ix["Total number of significant associations"].values == 13 assert diag.ix[" - sensitive"].values == 8 assert diag.ix[" - resistant"].values == 5 assert diag.ix["p-value significance threshold"].values == 0.001 assert diag.ix["FDR significance threshold"].values == 25 assert diag.ix["Range of significant p-values"].values[0] == "[9.87e-08, 0.0006358]" assert diag.ix["Range of significant % FDRs"].values[0] == "[0.04777 23.67]" assert_list_almost_equal(report.df.ix[0].values, np.array([1, 'BRAF_mut', 1373, None, None, 10, 35, -2.1750318847152079, 2.9802302648104275, -5.155262149525635, 2.291545942648078, 3.2964327113036669, 2.1492596576572947, 1.5638912124151449, 2.3986223028747027, 9.8695117331183039e-08, 9.8695117331182668e-08, None, None, None, 0.047768436788292422], dtype=object)) assert_list_almost_equal( [report.df.ix[12]['ANOVA_FEATURE_FDR']], [23.671582956786185]) report.create_html_pages(onweb=False) assert os.path.exists("skin/index.html") assert os.path.exists("skin/associations/manova.html") assert os.path.exists("skin/associations/a1.html") assert os.path.exists("skin/associations/BRAF_mut.html") assert os.path.exists("skin/associations/drug_1047.html")
def test_qvalue(): pvalues = np.array([0.8, 0.5, 0.5] + list(10**-np.linspace(1,10,9))) # This gives same answer as in R qvalue library # pvalues = c(c(0.8,0.5,0.5), 10**(-seq(1,10,2))) # qvalue(pvalues) # Note that here we enforce the pi0 value to be the same as in R # If not, this particular fails because pi0 is negative... # qvalue not very robust. qv = qvalue.QValue(list(pvalues), pi0=0.1109898) qv = qvalue.QValue(pvalues, pi0=0.1109898) assert_list_almost_equal(qv.qvalue(), np.array([ 8.87918400e-02, 6.05398909e-02, 6.05398909e-02, 1.47986400e-02, 1.24845912e-03, 1.06995688e-04, 9.36080212e-06, 8.42353356e-07, 7.89594880e-08, 7.89483504e-09, 8.88043662e-10, 1.33187760e-10])) try: qv = qvalue.QValue(pvalues) assert False except: assert True try: qv = qvalue.QValue(pvalues, pi0=0.1109898, lambdas=[0,.5,0.7]) assert False except: assert True try: qv = qvalue.QValue(pvalues, pi0=0.1109898, lambdas=[0,.5,0.7,.9,500]) assert False except: assert True try: qv = qvalue.QValue(pvalues, pi0=0.1109898, lambdas=[-10,.5,0.7,.9]) assert False except: assert True
def test_anova_summary(): an = ANOVA(ic50_test) # by default regression includes + msi + feature drug_id = 999 df = an.anova_one_drug_one_feature(drug_id, 'ASH1L_mut') x = an.anova_pvalues x = [x["tissue"], x["msi"], x["feature"]] y = [3.210453608523738e-06, 0.14579091345305398, 0.5430736275249095] assert_list_almost_equal(x, y, deltas=1e-10) an.settings.analysis_type = 'COREAD' # something different from PANCAN df = an.anova_one_drug_one_feature(drug_id, 'ASH1L_mut') x = an.anova_pvalues x = [x["msi"], x["feature"]] y = [0.262294448831941, 0.30599483315087317] assert_list_almost_equal(x, y, deltas=1e-10) # now remove also the MSI factor, in which case the tissue must also be # removed ! an.settings.include_MSI_factor = False an.settings.analysis_type = "COREAD" df = an.anova_one_drug_one_feature(drug_id, 'ASH1L_mut') x = [an.anova_pvalues["feature"]] y = [0.21266050833611852] assert_list_almost_equal(x, y, deltas=1e-10) assert (df.N_FEATURE_neg == 365).all()
def test_anova_summary(): an = ANOVA(ic50_test) # by default regression includes + msi + feature drug_id = 'Drug_999_IC50' df = an.anova_one_drug_one_feature(drug_id, 'ASH1L_mut') x = an.anova_pvalues y = [3.210453608523738e-06, 0.14579091345305398, 0.5430736275249095, None] assert_list_almost_equal(x, y) an.settings.analysis_type = 'COREAD' # something different from PANCAN df = an.anova_one_drug_one_feature(drug_id, 'ASH1L_mut') x = an.anova_pvalues y = [0.262294448831941, 0.30599483315087317, None] assert_list_almost_equal(x, y) # now remove also the MSI factor an.settings.include_MSI_factor = False df = an.anova_one_drug_one_feature(drug_id, 'ASH1L_mut') x = an.anova_pvalues y = [0.21266050833611852, None] assert_list_almost_equal(x, y) assert (df.N_FEATURE_neg == 365).all()
def test_anova_summary(): an = ANOVA(ic50_test) # by default regression includes + msi + feature drug_id = 999 df = an.anova_one_drug_one_feature(drug_id, "ASH1L_mut") x = an.anova_pvalues x = [x["tissue"], x["msi"], x["feature"]] y = [3.210453608523738e-06, 0.14579091345305398, 0.5430736275249095] assert_list_almost_equal(x, y, deltas=1e-10) an.settings.analysis_type = "COREAD" # something different from PANCAN df = an.anova_one_drug_one_feature(drug_id, "ASH1L_mut") x = an.anova_pvalues x = [x["msi"], x["feature"]] y = [0.262294448831941, 0.30599483315087317] assert_list_almost_equal(x, y, deltas=1e-10) # now remove also the MSI factor, in which case the tissue must also be # removed ! an.settings.include_MSI_factor = False an.settings.analysis_type = "COREAD" df = an.anova_one_drug_one_feature(drug_id, "ASH1L_mut") x = [an.anova_pvalues["feature"]] y = [0.21266050833611852] assert_list_almost_equal(x, y, deltas=1e-10) assert (df.N_FEATURE_neg == 365).all()
def test_anova_one_drug_one_feature(): an = ANOVA(ic50_test) # test 1 drug drug_id = 'Drug_999_IC50' df = an.anova_one_drug_one_feature(drug_id=drug_id, feature_name='ABCB1_mut', show=True) control = { 'DRUG_ID': { 1: drug_id }, 'DRUG_NAME': { 1: drug_id }, 'DRUG_TARGET': { 1: drug_id }, 'FEATURE': { 1: 'ABCB1_mut' }, 'ANOVA_FEATURE_pval': { 1: 0.86842684367357359 }, 'FEATURE_IC50_T_pval': { 1: 0.48586107208790896 }, 'FEATURE_IC50_effect_size': { 1: 0.31407773405409201 }, 'FEATURE_delta_MEAN_IC50': { 1: -0.36105662590553411 }, 'FEATUREneg_Glass_delta': { 1: 0.31296252976074801 }, 'FEATUREneg_IC50_sd': { 1: 1.1536736560173895 }, 'FEATUREneg_logIC50_MEAN': { 1: 2.8007757068403043 }, 'FEATUREpos_Glass_delta': { 1: 0.53754504818376181 }, 'FEATUREpos_IC50_sd': { 1: 0.67167696386648634 }, 'FEATUREpos_logIC50_MEAN': { 1: 2.4397190809347702 }, 'ANOVA_MSI_pval': { 1: 0.14598946672374763 }, 'N_FEATURE_neg': { 1: 370 }, 'N_FEATURE_pos': { 1: 5 }, 'ANOVA_TISSUE_pval': { 1: 3.2808255732569986e-06 } } control = pd.DataFrame(control) assert_list_almost_equal(df, control)
def test_sc2b(): b = aggregation.SC2B_aggregation(version=1) #b.load_submissions() aucs = b.plot_aggr_best_score(N=2) easydev.assert_list_almost_equal(aucs['aggregation'], [0.25428664093754277, 0.22176905909721403]) aucs = b.plot_aggr_random(N=2, Nmax=2)
def test_set_cancer_type(): an = ANOVA(gdsctools_data("IC50_v17.csv.gz")) an.set_cancer_type("breast") assert_list_almost_equal([an.ic50.df.sum().sum()], [27721.255627472943])
def test_sc2a(): a = aggregation.SC2A_aggregation(version=1) #a.load_submissions() aucs = a.plot_aggr_best_score(N=2) easydev.assert_list_almost_equal(aucs['aggregation'], [0.48434056686051674, 0.46329666463290453]) aucs = a.plot_aggr_random(N=2, Nmax=2)