Ejemplo n.º 1
0
def test_func(names):
    from easydev import MultiProcessing
    t = MultiProcessing(verbose=False, maxcpu=8)
    for name in names:
        t.add_job(keggfunc, name)
    t.run()
    return t
Ejemplo n.º 2
0
def test_func(names):
    from easydev import MultiProcessing
    t = MultiProcessing(verbose=False, maxcpu=8)
    for name in names:
        t.add_job(keggfunc, name)
    t.run()
    return t
Ejemplo n.º 3
0
def multicore_analysis(anova, drugs, maxcpu=2):
    """Function used by :class:`ANOVA` to perform multiprocess analysis

    :param anova: an instance of :class:`ANOVA`
    :param list drugs: list of drugs to analyse
    :param int maxcpu: number of CPU to use

    :return: the instance itself with the individual_anova attribute filled
        with all results
    """
    t = MultiProcessing(maxcpu=maxcpu)
    for i, drug in enumerate(drugs):
        if drug not in anova.individual_anova.keys():
            t.add_job(analyse_one_drug, anova, drug)
    t.run()

    # populate the ANOVA instance with the results
    for this in t.results:
        drug = this[0]
        result = this[1]
        anova.individual_anova[drug] = result
    return anova
Ejemplo n.º 4
0
def multicore_anova(ic50, genomic_features, drug_decode=None, maxcpu=2,
                    sampling=0):
    """Using 4 cores, the entire analysis took 15 minutes using
    4 CPUs (16 Oct 2015).

    :param ic50: a filename or :class:`IC50` instance.
    :return: the anova instance itself (not the results); see example below.

    ::

        from gdsctool qq qs.anova import multicore
        master = multicore(dataset, maxcpu=2)
        results = master.anova_all()

        from gdsctools import ANOVAReport()
        report = ANOVAReport(master, results)
        report.create_html_pages(0

    .. warning:: experimental. Seems to work but sometimes hangs forever.
    """
    print("experimental code to run the analysis with several cores")
    print("May takes lots or resources and slow down your system")
    t1 = time.time()
    master = ANOVA(ic50, genomic_features=genomic_features,
                   drug_decode=drug_decode, low_memory=True)
    master.sampling = sampling

    drugs = master.ic50.drugIds

    t = MultiProcessing(maxcpu=maxcpu)
    # add all jobs (one per drug)
    for i, drug in enumerate(drugs):
        t.add_job(analyse_one_drug, master, drug)
    t.run()

    # populate the ANOVA instance with the results
    for this in t.results:
        drug = this[0]
        result = this[1]
        master.individual_anova[drug] = result

    print("\nTook " + str(time.time() - t1) + "seconds.")
    return master
Ejemplo n.º 5
0
def multicore_analysis(anova, drugs, maxcpu=2):
    """Function used by :class:`ANOVA` to perform multiprocess analysis

    :param anova: an instance of :class:`ANOVA`
    :param list drugs: list of drugs to analyse
    :param int maxcpu: number of CPU to use

    :return: the instance itself with the individual_anova attribute filled
        with all results
    """
    t = MultiProcessing(maxcpu=maxcpu)
    for i, drug in enumerate(drugs):
        if drug not in anova.individual_anova.keys():
            t.add_job(analyse_one_drug, anova, drug)
    t.run()

    # populate the ANOVA instance with the results
    for this in t.results:
        drug = this[0]
        result = this[1]
        anova.individual_anova[drug] = result
    return anova
Ejemplo n.º 6
0
def main(args=None):
    from sequana import logger
    if args is None:
        args = sys.argv[:]

    user_options = Options(prog="sequana")


    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
        options = user_options.parse_args(args[1:])
    options.verbose = not options.quiet


    if options.multiple is True:
        from sequana.modules_report.multi_summary import MultiSummary
        if options.glob:
            sms = MultiSummary(output_filename="multi_summary.html", 
                        pattern=options.glob, verbose=options.verbose)
        else:
            sms = MultiSummary(output_filename="multi_summary.html", 
                        verbose=options.verbose)
        sys.exit(0)

    # We put the import here to make the --help faster
    if options.file:
        options.glob = options.file

    from easydev import MultiProcessing
    from sequana.snaketools import FileFactory

    ff = FileFactory(options.glob)
    assert len(set(ff.extensions)) == 1, "Input files must have the same extensions"
    extension = ff.all_extensions[0]

    logger.info("Found %s files:" % len(ff.realpaths))
    for this in ff.realpaths:
        logger.info(" - " + this)

    mc = MultiProcessing(options.thread, progress=True)
    if extension in ["fastq", "fastq.gz"]:
        for filename in ff.realpaths:
            mc.add_job(get_fastq_stats, filename, options.sample)

    elif extension.endswith("bed"):
        for filename in ff.realpaths:
            mc.add_job(get_bed_stats, filename)

    elif extension.endswith("bam"):
        for filename in ff.realpaths:
            mc.add_job(get_bam_stats, filename)
    mc.run()


    # For the BED file only
    if extension.endswith("bed"):
        results = []
        for i, this in enumerate(ff.filenames):
            df = mc.results[i]
            df = pd.DataFrame(df)
            df = df.T
            df.index.name = this
            df = df.reset_index()
            df["filename"] = [this] * len(df)
            results.append(df)
        df = pd.concat(results).set_index("filename")
        print(df)
        return df

    results = {}
    for i, this in enumerate(ff.filenames):
        if i == 0:
            df = mc.results[0]
            df.index.name = this
        else:
            other = mc.results[i]
            other.index.name = this
            df = df.append(other)

        # For the bed files only
        results[this] = mc.results[i]


    # For FastQ only
    try:df.index = ff.filenames
    except:pass

    print()
    print(df)
    return df