def test_volcano_plot(): an = ANOVA(ic50_test) an.features.df = an.features.df[an.features.df.columns[0:10]] an = ANOVA(ic50_test, genomic_features=an.features.df) results = an.anova_all() # try the constructors v = VolcanoANOVA(results.df) v = VolcanoANOVA(results) # the selector metho v.df = v.selector(v.df) v.settings.savefig = False # some of the plotting v.volcano_plot_all_drugs() v.volcano_plot_all_features() v.volcano_plot_all() v._get_fdr_from_pvalue_interp(1e-10) v._get_pvalue_from_fdr(50) v._get_pvalue_from_fdr([50,60])
def test_get_boxplot_data(): an = ANOVA(ic50_test) odof = an._get_one_drug_one_feature_data('Drug_1047_IC50','TP53_mut') bb = BoxPlots(odof) data = bb._get_boxplot_data(mode='msi') assert data[1] == ['***MSI-stable neg', '***MSI-stable pos', '**MSI-unstable neg', '**MSI-unstable pos'] expected = [2.0108071495663922e-47, 0.0012564798887037905] assert_list_almost_equal([data[2][0], data[2][1]], expected)
def test_get_boxplot_data(): an = ANOVA(ic50_test) odof = an._get_one_drug_one_feature_data(1047, 'TP53_mut') bb = BoxPlots(odof) data = bb._get_boxplot_data(mode='msi') assert data[1] == [ '***MSI-stable neg', '***MSI-stable pos', '**MSI-unstable neg', '**MSI-unstable pos' ] expected = [2.0108071495663922e-47, 0.0012564798887037905] assert_list_almost_equal([data[2][0], data[2][1]], expected)
def test_IC50Cluster(): dataset = gdsctools_data("test_v18_clustering.tsv") ic50 = IC50Cluster(dataset) # In this data set, a drug is reported 3 times (1211) and should appear # as follows: assert 1211 in ic50.df.columns assert 11211 in ic50.df.columns assert 21211 in ic50.df.columns assert len(ic50.drugIds) == 860 assert len(ic50.df) == 50 an = ANOVA(ic50, dataset) an.diagnostics()['feasible_tests'] == 65026
def anova_pipeline(args=None): """This function is used by the standalone application called **gdsctools_anova** Type:: gdsctools_anova --help to get some help. """ msg = "Welcome to GDSCTools standalone" print_color(msg, purple, underline=True) # Keep the argument args as None by default to # allow testing e.g., in nosetests if args is None: args = sys.argv[:] elif len(args) == 1: args += ['--help'] user_options = ANOVAOptions(prog="gdsctools_anova") try: options = user_options.parse_args(args[1:]) except SystemExit: return # ----------------------------------------------------------------- # ---------------------------------------- options without analysis # ----------------------------------------------------------------- if options.version is True: print("This is version %s of gdsctools_anova" % gdsctools.version) return if options.testing is True: print('Testing mode:') from gdsctools import ANOVA, ic50_test an = ANOVA(ic50_test) df = an.anova_one_drug_one_feature('Drug_1047_IC50', 'TP53_mut') assert df.loc[1,'N_FEATURE_pos'] == 554, \ "N_feature_pos must be equal to 554" print(df.T) print(darkgreen("\nGDSCTools seems to be installed properly")) return if options.save_settings: from gdsctools import ANOVA, ic50_test an = ANOVA(ic50_test) an.settings.to_json(options.save_settings) print('Save a default parameter set in %s' % options.save_settings) return if options.license is True: print(gdsctools.license) return if options.summary is True: from gdsctools import anova an = anova.ANOVA(options.input_ic50, options.input_features) print(an) return if options.print_tissues is True: from gdsctools import anova an = anova.ANOVA(options.input_ic50, options.input_features) tissues = an.tissue_factor try: tissues = tissues.sort_values('Tissue Factor').unique() except: tissues = tissues.sort(inplace=False).unique() for name in tissues: print(name) return if options.print_drugs is True: from gdsctools import anova gdsc = anova.ANOVA(options.input_ic50, options.input_features) import textwrap print("\n".join(textwrap.wrap(" , ".join(gdsc.drugIds)))) return if options.print_features is True: from gdsctools import anova gdsc = anova.ANOVA(options.input_ic50, options.input_features) import textwrap print("\n".join(textwrap.wrap(" , ".join(gdsc.feature_names)))) return # ----------------------------------------------------------------- # --------------------------------------------------- real analysis # ----------------------------------------------------------------- # dispatcher to the functions according to the user parameters from gdsctools import ANOVA, ANOVAReport anova = ANOVA(options.input_ic50, options.input_features, options.input_drug, low_memory=not options.fast) anova = _set_settings(anova, options) if options.drug and options.drug not in anova.ic50.df.columns: print(red("Invalid Drug. Try --print-drug-names")) sys.exit(1) if options.drug is not None and options.feature is not None: print_color("ODOF mode", purple) anova_one_drug_one_feature(anova, options) elif options.drug is not None: print_color("ODAF mode", purple) anova_one_drug(anova, options) else: # analyse everything if options.feature is None: print_color("ADAF mode", purple) else: print_color("ADOF mode", purple) anova_all(anova, options) if options.onweb is False and options.no_html is False: msg = "\nNote that a directory {} was created and files saved into it" print(purple(msg.format(options.directory))) return
""" Analyse all associations (drug/feature) ========================================= Volcano plot (all associations) """ ##################################################### # from gdsctools import ANOVA, ic50_test gdsc = ANOVA(ic50_test) results = gdsc.anova_all() results.volcano()
def test_anova_brca(): an1 = ANOVA(gdsctools_data('IC50_v17.csv.gz')) an1.set_cancer_type('breast') an = ANOVA(an1.ic50, gdsctools_data('GF_BRCA_v17.csv.gz')) dfori = an.anova_all() df = dfori.df.sum() df = df.drop(['DRUG_TARGET', 'DRUG_NAME', 'DRUG_ID', 'FEATURE']) df = df.fillna(0) totest = df.to_dict() exact = {'ANOVA_FEATURE_FDR': 1133416.7761055394, 'ANOVA_FEATURE_pval': 5824.8201538614458, 'FEATURE_IC50_T_pval': 5824.8201538614449, 'FEATURE_IC50_effect_size': 4408.511449781573, 'FEATURE_delta_MEAN_IC50': 261.11373729866705, 'FEATURE_neg_Glass_delta': 4487.7401723134735, 'FEATURE_neg_IC50_sd': 14701.868130868914, 'FEATURE_neg_logIC50_MEAN': 28701.510736736222, 'FEATURE_pos_Glass_delta': 6536.8938399490198, 'FEATURE_pos_IC50_sd': 13362.588398939894, 'FEATURE_pos_logIC50_MEAN': 28962.624474034845, 'ANOVA_MSI_pval': 0.0, 'N_FEATURE_neg': 439196.0, 'N_FEATURE_pos': 92140.0, 'ANOVA_TISSUE_pval': 0.0, 'ASSOC_ID': 68509365.0, 'index': 68497660.0} for k, v in totest.items(): if k in ['ANOVA_MEDIA_pval']: continue assert_almost_equal(v, exact[k]) # test part of the report (summary section) r = ANOVAReport(an, dfori) totest = r.diagnostics().to_dict() exact = {'text': {0: 'Type of analysis', 1: 'Total number of possible drug/feature associations', 2: 'Total number of ANOVA tests performed', 3: 'Percentage of tests performed', 4: '', 5: 'Total number of tested drugs', 6: 'Total number of genomic features used', 7: 'Total number of screened cell lines', 8: 'MicroSatellite instability included as factor', 9: '', 10: 'Total number of significant associations', 11: ' - sensitive', 12: ' - resistant', 13: 'p-value significance threshold', 14: 'FDR significance threshold', 15: 'Range of significant p-values', 16: 'Range of significant % FDRs'}, 'value': {0: 'breast', 1: 13780, 2: 11705, 3: 84.94, 4: '', 5: 265, 6: 52, 7: 51, 8: False, 9: '', 10: 27, 11: 17, 12: 10, 13: 0.001, 14: 25, 15: '[2.098e-09, 0.0004356]', 16: '[0.002456 18.89]'}} assert totest == exact import shutil shutil.rmtree('breast')
def test_anova_brca(): an1 = ANOVA(gdsctools_data('IC50_v17.csv.gz')) an1.set_cancer_type('breast') an = ANOVA(an1.ic50, gdsctools_data('GF_BRCA_v17.csv.gz')) dfori = an.anova_all() df = dfori.df.sum() df = df.drop(['DRUG_TARGET', 'DRUG_NAME', 'DRUG_ID', 'FEATURE']) df = df.fillna(0) totest = df.to_dict() exact = { 'ANOVA_FEATURE_FDR': 1133416.7761055394, 'ANOVA_FEATURE_pval': 5824.8201538614458, 'FEATURE_IC50_T_pval': 5824.8201538614449, 'FEATURE_IC50_effect_size': 4408.511449781573, 'FEATURE_delta_MEAN_IC50': 261.11373729866705, 'FEATURE_neg_Glass_delta': 4487.7401723134735, 'FEATURE_neg_IC50_sd': 14701.868130868914, 'FEATURE_neg_logIC50_MEAN': 28701.510736736222, 'FEATURE_pos_Glass_delta': 6536.8938399490198, 'FEATURE_pos_IC50_sd': 13362.588398939894, 'FEATURE_pos_logIC50_MEAN': 28962.624474034845, 'ANOVA_MSI_pval': 0.0, 'N_FEATURE_neg': 439196.0, 'N_FEATURE_pos': 92140.0, 'ANOVA_TISSUE_pval': 0.0, 'ASSOC_ID': 68509365.0, 'index': 68497660.0 } for k, v in totest.items(): if k in ['ANOVA_MEDIA_pval']: continue assert_almost_equal(v, exact[k]) # test part of the report (summary section) r = ANOVAReport(an, dfori) totest = r.diagnostics().to_dict() exact = { 'text': { 0: 'Type of analysis', 1: 'Total number of possible drug/feature associations', 2: 'Total number of ANOVA tests performed', 3: 'Percentage of tests performed', 4: '', 5: 'Total number of tested drugs', 6: 'Total number of genomic features used', 7: 'Total number of screened cell lines', 8: 'MicroSatellite instability included as factor', 9: '', 10: 'Total number of significant associations', 11: ' - sensitive', 12: ' - resistant', 13: 'p-value significance threshold', 14: 'FDR significance threshold', 15: 'Range of significant p-values', 16: 'Range of significant % FDRs' }, 'value': { 0: 'breast', 1: 13780, 2: 11705, 3: 84.94, 4: '', 5: 265, 6: 52, 7: 51, 8: False, 9: '', 10: 27, 11: 17, 12: 10, 13: 0.001, 14: 25, 15: '[2.098e-09, 0.0004356]', 16: '[0.002456 18.89]' } } assert totest == exact
""" Association between a Drug and a Feature ========================================= Boxplot association """ ##################################################### # from gdsctools import ANOVA, ic50_test gdsc = ANOVA(ic50_test) gdsc.set_cancer_type('breast') df = gdsc.anova_one_drug_one_feature(1047, 'TP53_mut', show=True)