def test_func(names): from easydev import MultiProcessing t = MultiProcessing(verbose=False, maxcpu=8) for name in names: t.add_job(keggfunc, name) t.run() return t
def multicore_anova(ic50, genomic_features, drug_decode=None, maxcpu=2, sampling=0): """Using 4 cores, the entire analysis took 15 minutes using 4 CPUs (16 Oct 2015). :param ic50: a filename or :class:`IC50` instance. :return: the anova instance itself (not the results); see example below. :: from gdsctool qq qs.anova import multicore master = multicore(dataset, maxcpu=2) results = master.anova_all() from gdsctools import ANOVAReport() report = ANOVAReport(master, results) report.create_html_pages(0 .. warning:: experimental. Seems to work but sometimes hangs forever. """ print("experimental code to run the analysis with several cores") print("May takes lots or resources and slow down your system") t1 = time.time() master = ANOVA(ic50, genomic_features=genomic_features, drug_decode=drug_decode, low_memory=True) master.sampling = sampling drugs = master.ic50.drugIds t = MultiProcessing(maxcpu=maxcpu) # add all jobs (one per drug) for i, drug in enumerate(drugs): t.add_job(analyse_one_drug, master, drug) t.run() # populate the ANOVA instance with the results for this in t.results: drug = this[0] result = this[1] master.individual_anova[drug] = result print("\nTook " + str(time.time() - t1) + "seconds.") return master
def multicore_analysis(anova, drugs, maxcpu=2): """Function used by :class:`ANOVA` to perform multiprocess analysis :param anova: an instance of :class:`ANOVA` :param list drugs: list of drugs to analyse :param int maxcpu: number of CPU to use :return: the instance itself with the individual_anova attribute filled with all results """ t = MultiProcessing(maxcpu=maxcpu) for i, drug in enumerate(drugs): if drug not in anova.individual_anova.keys(): t.add_job(analyse_one_drug, anova, drug) t.run() # populate the ANOVA instance with the results for this in t.results: drug = this[0] result = this[1] anova.individual_anova[drug] = result return anova
def main(args=None): from sequana import logger if args is None: args = sys.argv[:] user_options = Options(prog="sequana") # If --help or no options provided, show the help if len(args) == 1: user_options.parse_args(["prog", "--help"]) else: options = user_options.parse_args(args[1:]) options.verbose = not options.quiet if options.multiple is True: from sequana.modules_report.multi_summary import MultiSummary if options.glob: sms = MultiSummary(output_filename="multi_summary.html", pattern=options.glob, verbose=options.verbose) else: sms = MultiSummary(output_filename="multi_summary.html", verbose=options.verbose) sys.exit(0) # We put the import here to make the --help faster if options.file: options.glob = options.file from easydev import MultiProcessing from sequana.snaketools import FileFactory ff = FileFactory(options.glob) assert len(set(ff.extensions)) == 1, "Input files must have the same extensions" extension = ff.all_extensions[0] logger.info("Found %s files:" % len(ff.realpaths)) for this in ff.realpaths: logger.info(" - " + this) mc = MultiProcessing(options.thread, progress=True) if extension in ["fastq", "fastq.gz"]: for filename in ff.realpaths: mc.add_job(get_fastq_stats, filename, options.sample) elif extension.endswith("bed"): for filename in ff.realpaths: mc.add_job(get_bed_stats, filename) elif extension.endswith("bam"): for filename in ff.realpaths: mc.add_job(get_bam_stats, filename) mc.run() # For the BED file only if extension.endswith("bed"): results = [] for i, this in enumerate(ff.filenames): df = mc.results[i] df = pd.DataFrame(df) df = df.T df.index.name = this df = df.reset_index() df["filename"] = [this] * len(df) results.append(df) df = pd.concat(results).set_index("filename") print(df) return df results = {} for i, this in enumerate(ff.filenames): if i == 0: df = mc.results[0] df.index.name = this else: other = mc.results[i] other.index.name = this df = df.append(other) # For the bed files only results[this] = mc.results[i] # For FastQ only try:df.index = ff.filenames except:pass print() print(df) return df