def mpld3_to_html(self): """This require to call a plotting figure before hand""" from gdsctools import gdsctools_data # This copy the full path and therefore HTML cannot # be moved in another directory. to be fixed. js_path1 = gdsctools_data('d3.v3.min.js', where='javascript') js_path2 = gdsctools_data('mpld3.v0.2.js', where='javascript') try: # mpld3 is great but there are a couple of issues # 1 - legend zorder is not used so dots may be below the legend, # hence we set the framealpha =0.5 # 2 - % character even though there well interpreted in matploltib # using \%, they are not once parsed by mpld3. So, here # we remove the \ character axl = pylab.legend(loc='best', framealpha=0.8, borderpad=1) axl.set_zorder(10) # in case there is a circle behind the legend. texts = [this.get_text() for this in axl.get_texts()] for i, text in enumerate(texts): text = text.replace("\\%", "%") text += " " axl.get_texts()[i].set_text(text) import mpld3 htmljs = mpld3.fig_to_html(self.current_fig, d3_url=js_path1, mpld3_url=js_path2) except: htmljs = "" return """<div class="jsimage"> """ + htmljs + "</div>"
def test_regression_elastic_net(): ic = IC50(gdsctools_data("IC50_v5.csv.gz")) gf = GenomicFeatures(gdsctools_data("genomic_features_v5.csv.gz")) gd = GDSCElasticNet(ic, gf, verbose=True) print(gd) drugid = 1047 # automatic CV to get best model results = gd.runCV(drugid, n_folds=5) bestalpha = results.alpha results.coefficients best_model = gd.get_model(alpha=bestalpha) # Some plotting gd.plot_weight(drugid, best_model) #gd.plot_weight(drugid, best_model, Nmax=4) gd.plot_importance(drugid, model=best_model) gd.plot_importance(drugid, model=None) # manual fit scores = gd.fit(drugid, alpha=bestalpha) results = gd.tune_alpha(drugid, alpha_range=(-3.5,-1)) res = gd.check_randomness(drugid, N=10) res = gd.boxplot(drugid, model=best_model, n=10, bx_vert=False) res = gd.boxplot(drugid, model=best_model, n=10, bx_vert=True) gd.dendogram_coefficients() gd.dendogram_coefficients(stacked=False)
def test_regression_elastic_net(): ic = IC50(gdsctools_data("IC50_v5.csv.gz")) gf = GenomicFeatures(gdsctools_data("genomic_features_v5.csv.gz")) gd = GDSCElasticNet(ic, gf, verbose=True) print(gd) drugid = 1047 # automatic CV to get best model results = gd.runCV(drugid, kfolds=5) bestalpha = results.alpha results.coefficients best_model = gd.get_model(alpha=bestalpha) # Some plotting gd.plot_weight(drugid, best_model) #gd.plot_weight(drugid, best_model, Nmax=4) gd.plot_importance(drugid, model=best_model) gd.plot_importance(drugid, model=None) # manual fit scores = gd.fit(drugid, alpha=bestalpha) results = gd.tune_alpha(drugid, alpha_range=(-3.5, -1)) res = gd.check_randomness(drugid, N=10) res = gd.boxplot(drugid, model=best_model, n=10, bx_vert=False) res = gd.boxplot(drugid, model=best_model, n=10, bx_vert=True) gd.dendogram_coefficients() gd.dendogram_coefficients(stacked=False)
def test_analysis(): GF = gdsctools_data("genomic_features_v5.csv.gz") IC = gdsctools_data("IC50_v5.csv.gz") # Test that database must be provided import tempfile pname = tempfile.mkdtemp() df = regression.main([prog, '-F', GF, '-I', IC, "-O", pname, "--force"])
def test_readers_tabs(): # If the files ends in csv but its content is tsv, this may be an issue try: IC50(gdsctools_data("test_IC50_tabs.csv")) assert False except: assert True
def _init_report(self): super(ReportMain, self)._init_report() for filename in ['EBI_logo.png', 'sanger-logo.png']: target = os.sep.join([self.directory, 'images', filename]) if os.path.isfile(target) is False: filename = gdsctools_data("images" + os.sep + filename) shutil.copy(filename, target)
def _init_report(self): super(ReportMain, self)._init_report() for filename in ["EBI_logo.png", "sanger-logo.png"]: target = os.sep.join([self.directory, "images", filename]) if os.path.isfile(target) is False: filename = gdsctools_data("images" + os.sep + filename) shutil.copy(filename, target)
def test_gdsc(): import tempfile tempdir = tempfile.mkdtemp() compdir = tempdir + os.sep + "company_packages" tissuedir = tempdir + os.sep + "tissue_packages" pathtoGF = os.path.split(gdsctools_data("GF_BRCA_v17.csv.gz"))[0] ic50 = gdsctools_data('IC50_v17.csv.gz') DD = gdsctools_data("test_drug_decode2.csv") gg = GDSC(ic50, DD, pathtoGF + '/GF_*_v17.csv.gz') gg.company_directory = compdir gg.tissue_directory = tissuedir gg.analyse() assert gg.companies == ['COMPANY_A', 'COMPANY_B'] gg.create_data_packages_for_companies() gg.create_summary_pages()
def _init_report(self): """create the report directory and return the directory name""" self.sections = [] self.section_names = [] # if the directory already exists, print a warning try: if os.path.isdir(self.directory) is False: print("Created directory {}".format(self.directory)) os.mkdir(self.directory) # list of directories created in the constructor for this in self._to_create: try: os.mkdir(self.directory + os.sep + this) except: pass # already created ? except Exception: pass finally: for filename in ['gdsc.css', 'github-gist.css']: target = os.sep.join([self.directory, 'css', filename ]) if os.path.isfile(target) is False: filename = gdsctools_data(filename) shutil.copy(filename, target) for filename in ['sorttable.js', 'highlight.pack.js']: target = os.sep.join([self.directory, 'js', filename ]) if os.path.isfile(target) is False: filename = gdsctools_data(filename) shutil.copy(filename, target) for filename in ['EBI_logo.png', 'sanger-logo.png']: target = os.sep.join([self.directory, 'images', filename ]) if os.path.isfile(target) is False: filename = gdsctools_data("images" + os.sep + filename) shutil.copy(filename, target)
def _init_report(self): """create the report directory and return the directory name""" self.sections = [] self.section_names = [] # if the directory already exists, print a warning try: if os.path.isdir(self.directory) is False: print("Created directory {}".format(self.directory)) os.mkdir(self.directory) # list of directories created in the constructor for this in self._to_create: try: os.mkdir(self.directory + os.sep + this) except: pass # already created ? except Exception: pass finally: for filename in ['gdsc.css', 'github-gist.css']: target = os.sep.join([self.directory, 'css', filename]) if os.path.isfile(target) is False: filename = gdsctools_data(filename) shutil.copy(filename, target) for filename in ['sorttable.js', 'highlight.pack.js']: target = os.sep.join([self.directory, 'js', filename]) if os.path.isfile(target) is False: filename = gdsctools_data(filename) shutil.copy(filename, target) for filename in ['EBI_logo.png', 'sanger-logo.png']: target = os.sep.join([self.directory, 'images', filename]) if os.path.isfile(target) is False: filename = gdsctools_data("images" + os.sep + filename) shutil.copy(filename, target)
def test_IC50Cluster(): dataset = gdsctools_data("test_v18_clustering.tsv") ic50 = IC50Cluster(dataset) # In this data set, a drug is reported 3 times (1211) and should appear # as follows: assert 1211 in ic50.df.columns assert 11211 in ic50.df.columns assert 21211 in ic50.df.columns assert len(ic50.drugIds) == 860 assert len(ic50.df) == 50 an = ANOVA(ic50, dataset) an.diagnostics()['feasible_tests'] == 65026
def test_drugs(): r1 = DrugDecode(testing.drug_test_csv) r1.drugIds r2 = DrugDecode(testing.drug_test_tsv) r2.drugIds assert r1 == r2 # r1.get_info() this example fails because all webrelease are NAN assert len(r1) == 11 dd = DrugDecode(gdsctools_data("test_drug_decode_comp.csv")) assert dd.companies == ["ME"] assert dd.is_public(5) == 'Y' dd.check() assert dd.get_info()['N_prop'] == 1 # test repr and print print(dd) dd # test __add__ assert dd + dd == dd assert len(dd.get_public_and_one_company("ME")) == 10
from gdsctools.omnibem import OmniBEMBuilder from gdsctools import gdsctools_data omnibem_data = gdsctools_data("test_omnibem_genomic_alterations.csv.gz") omnibem_genes = gdsctools_data("test_omnibem_genes.txt") def test_omnibem(): ob = OmniBEMBuilder(omnibem_data) assert len(ob) == 56943 ob.filter_by_gene_list(omnibem_genes) mobem = ob.get_mobem() assert mobem[mobem.columns[3:]].sum().sum() == 54061 # ob.plot_number_alteration_by_tissue() ob.plot_alterations_per_cellline() ob.get_significant_genes() # filter by cosmic id ob = OmniBEMBuilder(omnibem_data) ob.filter_by_cosmic_list([910916]) mobem = ob.get_mobem() assert mobem.shape == (1, 105) assert mobem.ix[0, 3:].sum() == 102 ob = OmniBEMBuilder(omnibem_data) ob.filter_by_type_list(["Methylation"]) mobem = ob.get_mobem() assert mobem[mobem.columns[3:]].sum().sum() == 12964
def test_set_cancer_type(): an = ANOVA(gdsctools_data("IC50_v17.csv.gz")) an.set_cancer_type("breast") assert_list_almost_equal([an.ic50.df.sum().sum()], [27721.255627472943])
def test_lasso(): ic = IC50(gdsctools_data("IC50_v5.csv.gz")) gf = GenomicFeatures(gdsctools_data("genomic_features_v5.csv.gz")) gd = GDSCLasso(ic, gf, verbose=True) gd.runCV(1047).alpha
def __init__(self, filename='index.html', directory='report', overwrite=True, verbose=True, template_filename='index.html', mode=None): """.. rubric:: Constructor :param filename: default to **index.html** :param directory: defaults to **report** :param overwrite: default to True :param verbose: default to True :param dependencies: add the dependencies table at the end of the document if True. :param str mode: if none, report have a structure that contains the following directories: OUTPUT, INPUT, js, css, images, code. Otherwise, if mode is set to 'summary', only the following directories are created: js, css, images, code """ #: name of the analysis added in the title self.analysis = 'anova' self.pkgname = 'gdsctools' from gdsctools import version #: version added in the sub title self.version = version self._directory = directory self._filename = filename # This contains the sections and their names when # method add_section is used self.sections = [] self.section_names = [] #: flag to add dependencies self.add_dependencies = False self.title = 'ANOVA analysis summary' self.analysis_type = "PANCAN" # For jinja2 inheritance, we need to use the environment # to indicate where are the parents' templates template_directory = gdsctools_data('templates') self.env = Environment() self.env.loader = FileSystemLoader(template_directory) # use template provided inside gdsctools self.template = self.env.get_template(template_filename) self.jinja = { 'time_now': self.get_time_now(), "analysis": self.analysis, "version": self.version, "title": self.title, "analysis_domain": self.analysis_type, 'dependencies': self.get_table_dependencies().to_html(), } if mode is None: self._to_create = ['OUTPUT', 'INPUT', 'images', 'css', 'js', 'code'] elif mode == 'summary': self._to_create = ['images', 'css', 'js',] self._init_report()
def _gsf(filename): from gdsctools import gdsctools_data return gdsctools_data(filename)
def main(args=None): """This function is used by **gdsctools_regression** Type:: gdsctools_regression --help to get some help. """ msg = "Welcome to GDSCTools standalone (lasso, ridge, elastic net)" print_color(msg, purple, underline=True) # Keep the argument args as None by default to # allow testing e.g., in nosetests if args is None: args = sys.argv[:] elif len(args) == 1: args += ['--help'] user_options = RegressionOptions(prog="gdsctools_regression") try: options = user_options.parse_args(args[1:]) except SystemExit: return # ----------------------------------------------------------------- if options.version is True: print("This is version %s of gdsctools_regression" % gdsctools.version) return if options.license is True: print(gdsctools.license) return # -------------------------------------------- real analysis -------- try: os.mkdir(options.output_directory) except: if options.force is False: print_color(("directory already exists, erase it or choose a different " " output directory with --output-directory. Use --force to force" " your choice"), "red") return else: pass # keep going # Copy the regression pipeline filename = gdsctools.gdsctools_data("regression.rules", '../pipelines') #gdsctools_path = easydev.get_package_location('gdsctools') #filename = os.sep.join([gdsctools_path, "gdsctools", "pipelines", # "regression.rules"]) shutil.copy(filename, options.output_directory) # create the config params = {"method":options.method, "kfold": options.kfold, "ic50": os.path.realpath(options.input_ic50), "features": os.path.realpath(options.input_features)} config_template = """ # Analysis regression: method: %(method)s # lasso, elasticnet or ridge kfold: %(kfold)s # Used to automatically estimate best alpha parameter randomness: 50 boxplot_n: 5 # Input data sets input: ic50: %(ic50)s genomic_features: %(features)s """ with open(options.output_directory + os.sep + "config.yaml", "w") as fh: fh.write(config_template % params) print("File config.yaml and regression.rules created in ./%s" % options.output_directory) print("First go to the directory where analysis will be performed\n\n") print(" cd %s\n" % options.output_directory) msg = """You have two choices now. Either you are on a laptop, or you are on a cluster. 1. LOCAL COMPUTER: ------------------ snakemake -s regression.rules -p where -p means 'print statements' 2. CLUSTERS: ------------ On a SLURM cluster, you can make use of the many cores available by typing for instance: srun --qos normal snakemake -s regression.rules -j 40 --cluster "sbatch --qos normal" For more information about snakemake commands, type snakemake --help """ print(msg) with open(options.output_directory + os.sep + "README", "w") as fh: fh.write(msg)
from gdsctools.omnibem import OmniBEMBuilder from gdsctools import gdsctools_data omnibem_data = gdsctools_data("test_omnibem_genomic_alterations.csv.gz") omnibem_genes = gdsctools_data("test_omnibem_genes.txt") def test_omnibem(): ob = OmniBEMBuilder(omnibem_data) ob.filter_by_gene_list(omnibem_genes) mobem = ob.get_mobem() assert mobem[mobem.columns[3:]].sum().sum() == 54061 # ob.plot_number_alteration_by_tissue() ob.plot_alterations_per_cellline() ob.get_significant_genes() # filter by cosmic id ob = OmniBEMBuilder(omnibem_data) ob.filter_by_cosmic_list([910916]) mobem = ob.get_mobem() assert mobem.shape == (1,105) assert mobem.ix[0,3:].sum() == 102 ob = OmniBEMBuilder(omnibem_data) ob.filter_by_type_list(["Methylation"]) mobem = ob.get_mobem() assert mobem[mobem.columns[3:]].sum().sum() == 12964
def create_summary_pages(self, main_directory='ALL'): # Read in ALL all directories # create directories and copy relevant files self.mkdir(main_directory + os.sep + 'images') self.mkdir(main_directory + os.sep + 'css') self.mkdir(main_directory + os.sep + 'js') from gdsctools import gdsctools_data for filename in ['gdsc.css', 'github-gist.css']: target = os.sep.join([main_directory, 'css', filename]) if os.path.isfile(target) is False: filename = gdsctools_data(filename) shutil.copy(filename, target) for filename in ['highlight.pack.js']: target = os.sep.join([main_directory, 'js', filename]) if os.path.isfile(target) is False: filename = gdsctools_data(filename) shutil.copy(filename, target) for filename in ['EBI_logo.png', 'sanger-logo.png']: target = os.sep.join([main_directory, 'images', filename]) if os.path.isfile(target) is False: dire = 'data' + os.sep + 'images' filename = gdsctools_data("images" + os.sep + filename) shutil.copy(filename, target) directories = glob.glob('ALL' + os.sep + '*') directories = [x for x in directories if os.path.isdir(x)] summary = [] for directory in sorted(directories): tcga = directory.split(os.sep)[1] if tcga in ['css', 'images']: continue # number of hits path = directory + os.sep + 'OUTPUT' + os.sep try: hits = pd.read_csv(path + 'drugs_summary.csv', sep=',') except: summary.append([tcga] + [None] * 5) continue total_hits = hits.total.sum() drug_involved = get_drug_id(hits['Unnamed: 0'].unique()) results = ANOVAResults(path + 'results.csv') if len(results) > 0: drug_ids = get_drug_id(results.df.DRUG_ID.unique()) else: drug_ids = [] path = directory + os.sep + 'INPUT' + os.sep drug_decode = DrugDecode(path + 'DRUG_DECODE.csv') info = drug_decode.get_info() webrelease = drug_decode.df.ix[drug_involved].WEBRELEASE drug_inv_public = sum(webrelease == 'Y') drug_inv_prop = sum(webrelease != 'Y') summary.append([ tcga, total_hits, drug_inv_prop, info['N_prop'], drug_inv_public, info['N_public'] ]) df = pd.DataFrame(summary) df.columns = [ 'Analysis name', 'Number of hits', 'Number of involved proprietary compounds', 'out of', 'Number of involved public', 'out of' ] # FIXME include css and images of logo # FIXME save in the proper directory output_dir = main_directory + os.sep + '..' + os.sep output_file = output_dir + os.sep + 'index.html' self.html_page = ReportMAIN(directory='ALL', filename='index.html', template_filename='datapack_summary.html') # Let us use our HTMLTable to add the HTML references from gdsctools.report import HTMLTable self.html_table = HTMLTable(df) self.html_table.add_href('Analysis name', newtab=True, url=None, suffix='/index.html') #html_table.add_bgcolor('Number of hits') self.html_page.jinja['data_table'] = self.html_table.to_html() self.html_page.write() return df
def test_anova_brca(): an1 = ANOVA(gdsctools_data('IC50_v17.csv.gz')) an1.set_cancer_type('breast') an = ANOVA(an1.ic50, gdsctools_data('GF_BRCA_v17.csv.gz')) dfori = an.anova_all() df = dfori.df.sum() df = df.drop(['DRUG_TARGET', 'DRUG_NAME', 'DRUG_ID', 'FEATURE']) df = df.fillna(0) totest = df.to_dict() exact = { 'ANOVA_FEATURE_FDR': 1133416.7761055394, 'ANOVA_FEATURE_pval': 5824.8201538614458, 'FEATURE_IC50_T_pval': 5824.8201538614449, 'FEATURE_IC50_effect_size': 4408.511449781573, 'FEATURE_delta_MEAN_IC50': 261.11373729866705, 'FEATURE_neg_Glass_delta': 4487.7401723134735, 'FEATURE_neg_IC50_sd': 14701.868130868914, 'FEATURE_neg_logIC50_MEAN': 28701.510736736222, 'FEATURE_pos_Glass_delta': 6536.8938399490198, 'FEATURE_pos_IC50_sd': 13362.588398939894, 'FEATURE_pos_logIC50_MEAN': 28962.624474034845, 'ANOVA_MSI_pval': 0.0, 'N_FEATURE_neg': 439196.0, 'N_FEATURE_pos': 92140.0, 'ANOVA_TISSUE_pval': 0.0, 'ASSOC_ID': 68509365.0, 'index': 68497660.0 } for k, v in totest.items(): if k in ['ANOVA_MEDIA_pval']: continue assert_almost_equal(v, exact[k]) # test part of the report (summary section) r = ANOVAReport(an, dfori) totest = r.diagnostics().to_dict() exact = { 'text': { 0: 'Type of analysis', 1: 'Total number of possible drug/feature associations', 2: 'Total number of ANOVA tests performed', 3: 'Percentage of tests performed', 4: '', 5: 'Total number of tested drugs', 6: 'Total number of genomic features used', 7: 'Total number of screened cell lines', 8: 'MicroSatellite instability included as factor', 9: '', 10: 'Total number of significant associations', 11: ' - sensitive', 12: ' - resistant', 13: 'p-value significance threshold', 14: 'FDR significance threshold', 15: 'Range of significant p-values', 16: 'Range of significant % FDRs' }, 'value': { 0: 'breast', 1: 13780, 2: 11705, 3: 84.94, 4: '', 5: 265, 6: 52, 7: 51, 8: False, 9: '', 10: 27, 11: 17, 12: 10, 13: 0.001, 14: 25, 15: '[2.098e-09, 0.0004356]', 16: '[0.002456 18.89]' } } assert totest == exact
def create_summary_pages(self, main_directory='ALL'): # Read in ALL all directories # create directories and copy relevant files self.mkdir(main_directory + os.sep + 'images') self.mkdir(main_directory + os.sep + 'css') self.mkdir(main_directory + os.sep + 'js') from gdsctools import gdsctools_data for filename in ['gdsc.css', 'github-gist.css']: target = os.sep.join([main_directory, 'css', filename ]) if os.path.isfile(target) is False: filename = gdsctools_data(filename) shutil.copy(filename, target) for filename in ['highlight.pack.js']: target = os.sep.join([main_directory, 'js', filename ]) if os.path.isfile(target) is False: filename = gdsctools_data(filename) shutil.copy(filename, target) for filename in ['EBI_logo.png', 'sanger-logo.png']: target = os.sep.join([main_directory, 'images', filename ]) if os.path.isfile(target) is False: dire = 'data' + os.sep + 'images' filename = gdsctools_data("images" + os.sep +filename) shutil.copy(filename, target) directories = glob.glob('ALL' + os.sep + '*') directories = [x for x in directories if os.path.isdir(x)] summary = [] for directory in sorted(directories): tcga = directory.split(os.sep)[1] if tcga in ['css', 'images']: continue # number of hits path = directory + os.sep + 'OUTPUT' + os.sep try: hits = pd.read_csv(path + 'drugs_summary.csv', sep=',') except: summary.append([tcga] + [None] * 5) continue total_hits = hits.total.sum() drug_involved = get_drug_id(hits['Unnamed: 0'].unique()) results = ANOVAResults(path + 'results.csv') if len(results)>0: drug_ids = get_drug_id(results.df.DRUG_ID.unique()) else: drug_ids = [] path = directory + os.sep + 'INPUT' + os.sep drug_decode = DrugDecode(path + 'DRUG_DECODE.csv') info = drug_decode.get_info() webrelease = drug_decode.df.ix[drug_involved].WEBRELEASE drug_inv_public = sum(webrelease == 'Y') drug_inv_prop = sum(webrelease != 'Y') summary.append([tcga, total_hits, drug_inv_prop, info['N_prop'], drug_inv_public, info['N_public']]) df = pd.DataFrame(summary) df.columns = ['Analysis name', 'Number of hits', 'Number of involved proprietary compounds', 'out of', 'Number of involved public', 'out of'] # FIXME include css and images of logo # FIXME save in the proper directory output_dir = main_directory + os.sep + '..' + os.sep output_file = output_dir + os.sep + 'index.html' self.html_page = ReportMAIN(directory='ALL', filename='index.html', template_filename='datapack_summary.html' ) # Let us use our HTMLTable to add the HTML references from gdsctools.report import HTMLTable self.html_table = HTMLTable(df) self.html_table.add_href('Analysis name', newtab=True, url=None, suffix='/index.html') #html_table.add_bgcolor('Number of hits') self.html_page.jinja['data_table'] = self.html_table.to_html() self.html_page.write() return df
def test_anova_brca(): an1 = ANOVA(gdsctools_data('IC50_v17.csv.gz')) an1.set_cancer_type('breast') an = ANOVA(an1.ic50, gdsctools_data('GF_BRCA_v17.csv.gz')) dfori = an.anova_all() df = dfori.df.sum() df = df.drop(['DRUG_TARGET', 'DRUG_NAME', 'DRUG_ID', 'FEATURE']) df = df.fillna(0) totest = df.to_dict() exact = {'ANOVA_FEATURE_FDR': 1133416.7761055394, 'ANOVA_FEATURE_pval': 5824.8201538614458, 'FEATURE_IC50_T_pval': 5824.8201538614449, 'FEATURE_IC50_effect_size': 4408.511449781573, 'FEATURE_delta_MEAN_IC50': 261.11373729866705, 'FEATURE_neg_Glass_delta': 4487.7401723134735, 'FEATURE_neg_IC50_sd': 14701.868130868914, 'FEATURE_neg_logIC50_MEAN': 28701.510736736222, 'FEATURE_pos_Glass_delta': 6536.8938399490198, 'FEATURE_pos_IC50_sd': 13362.588398939894, 'FEATURE_pos_logIC50_MEAN': 28962.624474034845, 'ANOVA_MSI_pval': 0.0, 'N_FEATURE_neg': 439196.0, 'N_FEATURE_pos': 92140.0, 'ANOVA_TISSUE_pval': 0.0, 'ASSOC_ID': 68509365.0, 'index': 68497660.0} for k, v in totest.items(): if k in ['ANOVA_MEDIA_pval']: continue assert_almost_equal(v, exact[k]) # test part of the report (summary section) r = ANOVAReport(an, dfori) totest = r.diagnostics().to_dict() exact = {'text': {0: 'Type of analysis', 1: 'Total number of possible drug/feature associations', 2: 'Total number of ANOVA tests performed', 3: 'Percentage of tests performed', 4: '', 5: 'Total number of tested drugs', 6: 'Total number of genomic features used', 7: 'Total number of screened cell lines', 8: 'MicroSatellite instability included as factor', 9: '', 10: 'Total number of significant associations', 11: ' - sensitive', 12: ' - resistant', 13: 'p-value significance threshold', 14: 'FDR significance threshold', 15: 'Range of significant p-values', 16: 'Range of significant % FDRs'}, 'value': {0: 'breast', 1: 13780, 2: 11705, 3: 84.94, 4: '', 5: 265, 6: 52, 7: 51, 8: False, 9: '', 10: 27, 11: 17, 12: 10, 13: 0.001, 14: 25, 15: '[2.098e-09, 0.0004356]', 16: '[0.002456 18.89]'}} assert totest == exact import shutil shutil.rmtree('breast')
def __init__(self, filename='index.html', directory='report', overwrite=True, verbose=True, template_filename='index.html', mode=None): """.. rubric:: Constructor :param filename: default to **index.html** :param directory: defaults to **report** :param overwrite: default to True :param verbose: default to True :param dependencies: add the dependencies table at the end of the document if True. :param str mode: if none, report have a structure that contains the following directories: OUTPUT, INPUT, js, css, images, code. Otherwise, if mode is set to 'summary', only the following directories are created: js, css, images, code """ #: name of the analysis added in the title self.analysis = 'anova' self.pkgname = 'gdsctools' from gdsctools import version #: version added in the sub title self.version = version self._directory = directory self._filename = filename # This contains the sections and their names when # method add_section is used self.sections = [] self.section_names = [] #: flag to add dependencies self.add_dependencies = False self.title = 'ANOVA analysis summary' self.analysis_type = "PANCAN" # For jinja2 inheritance, we need to use the environment # to indicate where are the parents' templates template_directory = gdsctools_data('templates') self.env = Environment() self.env.loader = FileSystemLoader(template_directory) # use template provided inside gdsctools self.template = self.env.get_template(template_filename) self.jinja = { 'time_now': self.get_time_now(), "analysis": self.analysis, "version": self.version, "title": self.title, "analysis_domain": self.analysis_type, 'dependencies': self.get_table_dependencies().to_html(), } if mode is None: self._to_create = [ 'OUTPUT', 'INPUT', 'images', 'css', 'js', 'code' ] elif mode == 'summary': self._to_create = [ 'images', 'css', 'js', ] self._init_report()