def run_main(config, config_file, fc_dir, run_info_yaml):
    work_dir = os.getcwd()
    fc_name, fc_date = get_flowcell_info(fc_dir)

    if run_info_yaml and os.path.exists(run_info_yaml):
        log.info("Found YAML samplesheet, using %s instead of Galaxy API" % run_info_yaml)
        with open(run_info_yaml) as in_handle:
            run_details = yaml.load(in_handle)
        run_info = dict(details=run_details, run_id="")
    else:
        log.info("Fetching run details from Galaxy instance")
        galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
        run_info = galaxy_api.run_details(fc_name, fc_date)
    fastq_dir = get_fastq_dir(fc_dir)
    run_items = _add_multiplex_across_lanes(run_info["details"], fastq_dir, fc_name)
    align_dir = os.path.join(work_dir, "alignments")

    # process each flowcell lane
    with utils.cpmap(config["algorithm"]["num_cores"]) as cpmap:
        for _ in cpmap(process_lane,
                       ((i, fastq_dir, fc_name, fc_date, align_dir, config, config_file)
                        for i in run_items)):
            pass
    # process samples, potentially multiplexed across multiple lanes
    sample_files, sample_fastq, sample_info = organize_samples(align_dir,
            fastq_dir, work_dir, fc_name, fc_date, run_items)
    with utils.cpmap(config["algorithm"]["num_cores"]) as cpmap:
        for _ in cpmap(process_sample, ((name, sample_fastq[name], sample_info[name],
                                         bam_files, work_dir, config, config_file)
                                        for name, bam_files in sample_files)):
            pass
    write_metrics(run_info, work_dir, fc_dir, fc_name, fc_date, fastq_dir)
def main(config_file):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    create_dirs(config)
    with cpmap(config["algorithm"]["cores"]) as cur_map:
        for _ in cur_map(process_fastq, ((fastq, ref_index, config, config_file)
                                         for fastq, ref_index in fastq_to_process(config))):
            pass
Example #3
0
def main(config_file):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    create_dirs(config)
    with cpmap(config["algorithm"]["cores"]) as cur_map:
        for _ in cur_map(process_fastq, ((fastq, ref_index, config, config_file)
                                         for fastq, ref_index in fastq_to_process(config))):
            pass
def run_main(config, config_file, fc_dir, run_info_yaml):
    work_dir = os.getcwd()
    fc_name, fc_date = get_flowcell_info(fc_dir)

    if run_info_yaml and os.path.exists(run_info_yaml):
        log.info("Found YAML samplesheet, using %s instead of Galaxy API" %
                 run_info_yaml)
        with open(run_info_yaml) as in_handle:
            run_details = yaml.load(in_handle)
        run_info = dict(details=run_details, run_id="")
    else:
        log.info("Fetching run details from Galaxy instance")
        galaxy_api = GalaxyApiAccess(config['galaxy_url'],
                                     config['galaxy_api_key'])
        run_info = galaxy_api.run_details(fc_name, fc_date)
    fastq_dir = get_fastq_dir(fc_dir)
    run_items = _add_multiplex_across_lanes(run_info["details"], fastq_dir,
                                            fc_name)
    align_dir = os.path.join(work_dir, "alignments")

    # process each flowcell lane
    with utils.cpmap(config["algorithm"]["num_cores"]) as cpmap:
        for _ in cpmap(
                process_lane,
            ((i, fastq_dir, fc_name, fc_date, align_dir, config, config_file)
             for i in run_items)):
            pass
    # process samples, potentially multiplexed across multiple lanes
    sample_files, sample_fastq, sample_info = organize_samples(
        align_dir, fastq_dir, work_dir, fc_name, fc_date, run_items)
    with utils.cpmap(config["algorithm"]["num_cores"]) as cpmap:
        for _ in cpmap(process_sample,
                       ((name, sample_fastq[name], sample_info[name],
                         bam_files, work_dir, config, config_file)
                        for name, bam_files in sample_files)):
            pass
    write_metrics(run_info, work_dir, fc_dir, fc_name, fc_date, fastq_dir)
Example #5
0
def _run_parallel(fn_name, items, dirs, config):
    """Process a supplied function: single, multi-processor or distributed.
    """
    parallel = config["algorithm"]["num_cores"]
    if str(parallel).lower() == "messaging":
        runner = messaging.runner(dirs, config)
        return runner(fn_name, items)
    else:
        out = []
        fn = globals()[fn_name]
        with utils.cpmap(int(parallel)) as cpmap:
            for data in cpmap(fn, items):
                if data:
                    out.extend(data)
        return out
Example #6
0
def main(config_file):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    ref_index = novoalign.refindex(config["ref"], kmer_size=13, step_size=1)
    create_dirs(config)
    for cur in config["input"]:
        in_fastq = cur["fastq"]
        if cur.get("old_style_barcodes", False):
            in_fastq = convert_illumina_oldstyle(in_fastq)
        bc_files = demultiplex(in_fastq, cur["barcodes"],
                               config["dir"]["tmp"], config)
        with cpmap(config["algorithm"]["cores"]) as cur_map:
            for _ in cur_map(process_fastq, ((bc_file, ref_index, cur, config, config_file)
                                             for bc_file in bc_files)):
                pass
Example #7
0
 def run_parallel(fn_name, items):
     parallel = config["algorithm"]["num_cores"]
     if str(parallel).lower() == "messaging":
         task_module = "{base}.tasks".format(base=module)
         runner_fn = runner(task_module, dirs, config, config_file)
         return runner_fn(fn_name, items)
     else:
         out = []
         fn = getattr(__import__("{base}.multitasks".format(base=module),
                                 fromlist=["multitasks"]),
                      fn_name)
         with utils.cpmap(int(parallel)) as cpmap:
             for data in cpmap(fn, items):
                 if data:
                     out.extend(data)
     return out
Example #8
0
 def run_parallel(fn_name, items, metadata=None):
     parallel = config["algorithm"]["num_cores"]
     if str(parallel).lower() == "messaging":
         task_module = "{base}.tasks".format(base=module)
         runner_fn = runner(task_module, dirs, config, config_file)
         return runner_fn(fn_name, items)
     else:
         out = []
         fn = getattr(
             __import__("{base}.multitasks".format(base=module),
                        fromlist=["multitasks"]), fn_name)
         cores = cores_including_resources(int(parallel), metadata, config)
         with utils.cpmap(cores) as cpmap:
             for data in cpmap(fn, filter(lambda x: x is not None, items)):
                 if data:
                     out.extend(data)
         return out
Example #9
0
 def run_parallel(fn_name, items, metadata=None):
     parallel = config["algorithm"]["num_cores"]
     if str(parallel).lower() == "messaging":
         task_module = "{base}.tasks".format(base=module)
         runner_fn = runner(task_module, dirs, config, config_file)
         return runner_fn(fn_name, items)
     else:
         out = []
         fn = getattr(__import__("{base}.multitasks".format(base=module),
                                 fromlist=["multitasks"]),
                      fn_name)
         cores = cores_including_resources(int(parallel), metadata, config)
         with utils.cpmap(cores) as cpmap:
             for data in cpmap(fn, filter(lambda x: x is not None, items)):
                 if data:
                     out.extend(data)
         return out
Example #10
0
 def run_parallel(fn_name, items, metadata=None):
     if parallel["type"].startswith("messaging"):
         task_module = "{base}.tasks".format(base=parallel["module"])
         runner_fn = runner(task_module, dirs, config, config_file)
         return runner_fn(fn_name, items)
     elif parallel["type"] == "ipython":
         return ipython.runner(parallel, fn_name, items, dirs["work"], config)
     else:
         out = []
         fn = getattr(__import__("{base}.multitasks".format(base=parallel["module"]),
                                 fromlist=["multitasks"]),
                      fn_name)
         cores = cores_including_resources(int(parallel["cores"]), metadata, config)
         with utils.cpmap(cores) as cpmap:
             for data in cpmap(fn, filter(lambda x: x is not None, items)):
                 if data:
                     out.extend(data)
         return out
Example #11
0
def process_ref(in_file, ref, config):
    db_info = {"blast" : (prepare_blast_db, blast_search),
               "blat" : (prepare_blat_db, blat_search)}
    prepare_db, do_search = db_info[ref.get("aligner", "blast")]
    ref_file = prepare_ref_file(ref, config)
    blast_db = prepare_db(ref_file, "nucl")
    out_file = "%s-%s.tsv" % (os.path.splitext(in_file)[0], ref["name"])
    if not os.path.exists(out_file):
        with open(in_file) as in_handle:
            with open(out_file, "w") as out_handle:
                writer = csv.writer(out_handle)
                writer.writerow(["query", "length", "hit", "hitlength", "hitidentities"])
                with utils.cpmap(config["algorithm"]["cores"]) as cpmap:
                    results = cpmap(do_search,
                                    ((rec, blast_db, config["dir"]["work"])
                                     for rec in SeqIO.parse(in_handle, "fasta")))
                    for info in results:
                        writer.writerow(info)
    return out_file
Example #12
0
 def run_parallel(fn_name, items, metadata=None):
     if parallel["type"].startswith("messaging"):
         task_module = "{base}.tasks".format(base=parallel["module"])
         runner_fn = runner(task_module, dirs, config, config_file)
         return runner_fn(fn_name, items)
     elif parallel["type"] == "ipython":
         return ipython.runner(parallel, fn_name, items, dirs["work"],
                               config)
     else:
         out = []
         fn = getattr(
             __import__("{base}.multitasks".format(base=parallel["module"]),
                        fromlist=["multitasks"]), fn_name)
         cores = cores_including_resources(int(parallel["cores"]), metadata,
                                           config)
         with utils.cpmap(cores) as cpmap:
             for data in cpmap(fn, filter(lambda x: x is not None, items)):
                 if data:
                     out.extend(data)
         return out
Example #13
0
 def run_parallel(fn_name, items, metadata=None):
     items = [x for x in items if x is not None]
     items = diagnostics.track_parallel(items, fn_name)
     if parallel["type"].startswith("messaging"):
         task_module = "{base}.tasks".format(base=parallel["module"])
         runner_fn = runner(task_module, dirs, config, config_file)
         return runner_fn(fn_name, items)
     elif parallel["type"] == "ipython":
         return ipython.runner(parallel, fn_name, items, dirs["work"], config)
     else:
         logger.info("multiprocessing: %s" % fn_name)
         out = []
         fn = getattr(__import__("{base}.multitasks".format(base=parallel["module"]),
                                 fromlist=["multitasks"]),
                      fn_name)
         num_jobs, cores_per_job = ipython.find_cores_per_job(fn, parallel, items, config)
         items = [ipython.add_cores_to_config(x, cores_per_job) for x in items]
         num_jobs = cores_including_resources(num_jobs, metadata, config)
         with utils.cpmap(num_jobs) as cpmap:
             for data in cpmap(fn, items):
                 if data:
                     out.extend(data)
         return out
Example #14
0
def process_ref(in_file, ref, config):
    db_info = {
        "blast": (prepare_blast_db, blast_search),
        "blat": (prepare_blat_db, blat_search)
    }
    prepare_db, do_search = db_info[ref.get("aligner", "blast")]
    ref_file = prepare_ref_file(ref, config)
    blast_db = prepare_db(ref_file, "nucl")
    out_file = "%s-%s.tsv" % (os.path.splitext(in_file)[0], ref["name"])
    if not os.path.exists(out_file):
        with open(in_file) as in_handle:
            with open(out_file, "w") as out_handle:
                writer = csv.writer(out_handle)
                writer.writerow(
                    ["query", "length", "hit", "hitlength", "hitidentities"])
                with utils.cpmap(config["algorithm"]["cores"]) as cpmap:
                    results = cpmap(
                        do_search,
                        ((rec, blast_db, config["dir"]["work"])
                         for rec in SeqIO.parse(in_handle, "fasta")))
                    for info in results:
                        writer.writerow(info)
    return out_file