Exemple #1
0
def main(config_file, fc_dir, analysis_dir, run_info_yaml=None):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    fc_name, fc_date = get_flowcell_info(fc_dir)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"], config["galaxy_api_key"])

    # run_info will override some galaxy details, if present
    if run_info_yaml:
        with open(run_info_yaml) as in_handle:
            run_details = yaml.load(in_handle)
        run_info = dict(details=run_details, run_id="")
    else:
        run_info = galaxy_api.run_details(fc_name, fc_date)

    base_folder_name = "%s_%s" % (fc_date, fc_name)
    run_details = lims_run_details(run_info, fc_name, base_folder_name)
    for (library_name, access_role, dbkey, lane, bc_id, name, desc, local_name) in run_details:
        library_id = get_galaxy_library(library_name, galaxy_api) if library_name else None
        upload_files = list(select_upload_files(local_name, bc_id, fc_dir, analysis_dir, config))

        if len(upload_files) > 0:
            print lane, bc_id, name, desc, library_name
            print "Creating storage directory"
            if library_id:
                folder, cur_galaxy_files = get_galaxy_folder(library_id, base_folder_name, name, desc, galaxy_api)
            else:
                cur_galaxy_files = []
            store_dir = move_to_storage(lane, bc_id, base_folder_name, upload_files, cur_galaxy_files, config)
            if store_dir and library_id:
                print "Uploading directory of files to Galaxy"
                print galaxy_api.upload_directory(library_id, folder["id"], store_dir, dbkey, access_role)
    if galaxy_api:
        add_run_summary_metrics(analysis_dir, galaxy_api)
def run_main(config, config_file, fc_dir, run_info_yaml):
    work_dir = os.getcwd()
    fc_name, fc_date = get_flowcell_info(fc_dir)

    if run_info_yaml and os.path.exists(run_info_yaml):
        log.info("Found YAML samplesheet, using %s instead of Galaxy API" % run_info_yaml)
        with open(run_info_yaml) as in_handle:
            run_details = yaml.load(in_handle)
        run_info = dict(details=run_details, run_id="")
    else:
        log.info("Fetching run details from Galaxy instance")
        galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
        run_info = galaxy_api.run_details(fc_name, fc_date)
    fastq_dir = get_fastq_dir(fc_dir)
    run_items = _add_multiplex_across_lanes(run_info["details"], fastq_dir, fc_name)
    align_dir = os.path.join(work_dir, "alignments")

    # process each flowcell lane
    with utils.cpmap(config["algorithm"]["num_cores"]) as cpmap:
        for _ in cpmap(process_lane,
                       ((i, fastq_dir, fc_name, fc_date, align_dir, config, config_file)
                        for i in run_items)):
            pass
    # process samples, potentially multiplexed across multiple lanes
    sample_files, sample_fastq, sample_info = organize_samples(align_dir,
            fastq_dir, work_dir, fc_name, fc_date, run_items)
    with utils.cpmap(config["algorithm"]["num_cores"]) as cpmap:
        for _ in cpmap(process_sample, ((name, sample_fastq[name], sample_info[name],
                                         bam_files, work_dir, config, config_file)
                                        for name, bam_files in sample_files)):
            pass
    write_metrics(run_info, work_dir, fc_dir, fc_name, fc_date, fastq_dir)
Exemple #3
0
def main(config_file, fc_dir, analysis_dir):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"], config["galaxy_api_key"])

    fc_name, fc_date = get_flowcell_info(fc_dir)
    folder_name = "%s_%s" % (fc_date, fc_name)
    run_info = lims_run_details(galaxy_api, fc_name, folder_name)
    for (dl_folder, access_role, dbkey, lane, bc_id, name, desc) in run_info:
        print folder_name, lane, bc_id, name, desc, dl_folder
        library_id = get_galaxy_library(dl_folder, galaxy_api)
        folder, cur_galaxy_files = get_galaxy_folder(library_id, folder_name, name, desc, galaxy_api)
        print "Creating storage directory"
        base_select = "%s_%s" % (lane, folder_name)
        store_dir = move_to_storage(
            lane,
            bc_id,
            folder_name,
            select_upload_files(base_select, bc_id, fc_dir, analysis_dir),
            cur_galaxy_files,
            config,
        )
        if store_dir:
            print "Uploading directory of files to Galaxy"
            print galaxy_api.upload_directory(library_id, folder["id"], store_dir, dbkey, access_role)
    add_run_summary_metrics(analysis_dir, galaxy_api)
Exemple #4
0
def main(config_file, fc_dir):
    work_dir = os.getcwd()
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
    fc_name, fc_date = get_flowcell_info(fc_dir)
    run_info = galaxy_api.run_details(fc_name)
    fastq_dir = get_fastq_dir(fc_dir)
    #print "Generating fastq files"
    #all_lanes = [i['lane'] for i in run_info["details"]]
    #short_fc_name = "%s_%s" % (fc_date, fc_name)
    #fastq_dir = generate_fastq(fc_dir, short_fc_name, all_lanes)
    if config["algorithm"]["num_cores"] > 1:
        pool = Pool(config["algorithm"]["num_cores"])
        try:
            pool.map(_process_wrapper,
                    ((i, fastq_dir, fc_name, fc_date, config, config_file)
                        for i in run_info["details"]))
        except:
            pool.terminate()
            raise
    else:
        map(_process_wrapper,
            ((i, fastq_dir, fc_name, fc_date, config, config_file)
                for i in run_info["details"]))
    write_metrics(run_info, work_dir, fc_dir, fastq_dir)
Exemple #5
0
def get_runinfo(galaxy_url, galaxy_apikey, run_folder, storedir):
    """Retrieve flattened run information for a processed directory from Galaxy nglims API.
    """
    galaxy_api = GalaxyApiAccess(galaxy_url, galaxy_apikey)
    fc_name, fc_date = flowcell.parse_dirname(run_folder)
    galaxy_info = galaxy_api.run_details(fc_name, fc_date)
    if "error" in galaxy_info:
        return galaxy_info
    if not galaxy_info["run_name"].startswith(fc_date) and not galaxy_info["run_name"].endswith(fc_name):
        raise ValueError("Galaxy NGLIMS information %s does not match flowcell %s %s" %
                         (galaxy_info["run_name"], fc_date, fc_name))
    ldetails = _flatten_lane_details(galaxy_info)
    out = []
    for item in ldetails:
        # Do uploads for all non-controls
        if item["description"] != "control" or item["project_name"] != "control":
            item["upload"] = {"method": "galaxy", "run_id": galaxy_info["run_id"],
                              "fc_name": fc_name, "fc_date": fc_date,
                              "dir": storedir,
                              "galaxy_url": galaxy_url, "galaxy_api_key": galaxy_apikey}
            for k in ["lab_association", "private_libs", "researcher", "researcher_id", "sample_id",
                      "galaxy_library", "galaxy_role"]:
                item["upload"][k] = item.pop(k, "")
        out.append(item)
    return out
Exemple #6
0
def organize(dirs, config, run_info_yaml):
    """Organize run information from a passed YAML file or the Galaxy API.

    Creates the high level structure used for subsequent processing.
    """
    if run_info_yaml and os.path.exists(run_info_yaml):
        logger.info("Using input YAML configuration: %s" % run_info_yaml)
        run_details = _run_info_from_yaml(dirs["flowcell"], run_info_yaml,
                                          config)
    else:
        logger.info("Fetching run details from Galaxy instance")
        fc_name, fc_date = get_flowcell_info(dirs["flowcell"])
        galaxy_api = GalaxyApiAccess(config['galaxy_url'],
                                     config['galaxy_api_key'])
        run_details = []
        galaxy_info = galaxy_api.run_details(fc_name, fc_date)
        for item in galaxy_info["details"]:
            item["upload"] = {
                "method": "galaxy",
                "run_id": galaxy_info["run_id"],
                "fc_name": fc_name,
                "fc_date": fc_date
            }
            run_details.append(item)
    out = []
    for item in run_details:
        item["config"] = config_utils.update_w_custom(config, item)
        item["dirs"] = dirs
        if "name" not in item:
            item["name"] = ["", item["description"]]
        item = _add_reference_resources(item)
        out.append(item)
    return out
Exemple #7
0
def main(config_file, month, year):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"],
                                 config["galaxy_api_key"])
    smonth, syear = (month - 1, year) if month > 1 else (12, year - 1)
    start_date = datetime(syear, smonth, 15, 0, 0, 0)
    # last day calculation useful if definition of month is
    # from first to last day instead of 15th-15th
    #(_, last_day) = calendar.monthrange(year, month)
    end_date = datetime(year, month, 14, 23, 59, 59)
    out_file = "%s_%s" % (start_date.strftime("%b"),
                          end_date.strftime("%b-%Y-sequencing.csv"))
    with open(out_file, "w") as out_handle:
        writer = csv.writer(out_handle)
        writer.writerow([
            "Date", "Product", "Payment", "Researcher", "Lab", "Email",
            "Project", "Sample", "Description", "Genome", "Flowcell", "Lane",
            "Notes"
        ])
        for s in galaxy_api.sqn_report(start_date.isoformat(),
                                       end_date.isoformat()):
            f_parts = s["sqn_run"]["run_folder"].split("_")
            flowcell = "_".join([f_parts[0], f_parts[-1]])
            writer.writerow([
                s["sqn_run"]["date"], s["sqn_type"],
                s["project"]["payment_(fund_number)"],
                s["project"]["researcher"], s["project"]["lab_association"],
                s["project"]["email"], s["project"]["project_name"], s["name"],
                s["description"], s["genome_build"], flowcell,
                s["sqn_run"]["lane"], s["sqn_run"]["results_notes"]
            ])
Exemple #8
0
def get_runinfo(galaxy_url, galaxy_apikey, run_folder, storedir):
    """Retrieve flattened run information for a processed directory from Galaxy nglims API.
    """
    galaxy_api = GalaxyApiAccess(galaxy_url, galaxy_apikey)
    fc_name, fc_date = flowcell.parse_dirname(run_folder)
    galaxy_info = galaxy_api.run_details(fc_name, fc_date)
    if "error" in galaxy_info:
        return galaxy_info
    if not galaxy_info["run_name"].startswith(fc_date) and not galaxy_info["run_name"].endswith(fc_name):
        raise ValueError("Galaxy NGLIMS information %s does not match flowcell %s %s" %
                         (galaxy_info["run_name"], fc_date, fc_name))
    ldetails = _flatten_lane_details(galaxy_info)
    out = []
    for item in ldetails:
        # Do uploads for all non-controls
        if item["description"] != "control" or item["project_name"] != "control":
            item["upload"] = {"method": "galaxy", "run_id": galaxy_info["run_id"],
                              "fc_name": fc_name, "fc_date": fc_date,
                              "dir": storedir,
                              "galaxy_url": galaxy_url, "galaxy_api_key": galaxy_apikey}
            for k in ["lab_association", "private_libs", "researcher", "researcher_id", "sample_id",
                      "galaxy_library", "galaxy_role"]:
                item["upload"][k] = item.pop(k, "")
        out.append(item)
    return out
Exemple #9
0
def organize(dirs, config, run_info_yaml):
    """Organize run information from a passed YAML file or the Galaxy API.

    Creates the high level structure used for subsequent processing.
    """
    if run_info_yaml and os.path.exists(run_info_yaml):
        logger.info("Using input YAML configuration: %s" % run_info_yaml)
        run_details = _run_info_from_yaml(dirs["flowcell"], run_info_yaml, config)
    else:
        logger.info("Fetching run details from Galaxy instance")
        fc_name, fc_date = flowcell.parse_dirname(dirs["flowcell"])
        galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
        run_details = []
        galaxy_info = galaxy_api.run_details(fc_name, fc_date)
        for item in galaxy_info["details"]:
            item["upload"] = {"method": "galaxy", "run_id": galaxy_info["run_id"],
                              "fc_name": fc_name, "fc_date": fc_date}
            run_details.append(item)
    out = []
    for item in run_details:
        # add algorithm details to configuration, avoid double specification
        item["config"] = config_utils.update_w_custom(config, item)
        item.pop("algorithm", None)
        item["dirs"] = dirs
        if "name" not in item:
            item["name"] = ["", item["description"]]
        item = add_reference_resources(item)
        out.append(item)
    return out
Exemple #10
0
def get_run_info(fc_dir, config, run_info_yaml):
    """Retrieve run information from a passed YAML file or the Galaxy API.
    """
    if run_info_yaml and os.path.exists(run_info_yaml):
        logger.info("Found YAML samplesheet, using %s instead of Galaxy API" % run_info_yaml)
        fc_name, fc_date, run_info = _run_info_from_yaml(fc_dir, run_info_yaml)
    else:
        logger.info("Fetching run details from Galaxy instance")
        fc_name, fc_date = get_flowcell_info(fc_dir)
        galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
        run_info = galaxy_api.run_details(fc_name, fc_date)
    return fc_name, fc_date, _organize_runs_by_lane(run_info)
Exemple #11
0
def get_run_info(fc_dir, config, run_info_yaml):
    """Retrieve run information from a passed YAML file or the Galaxy API.
    """
    if run_info_yaml and os.path.exists(run_info_yaml):
        logger.info("Found YAML samplesheet, using %s instead of Galaxy API" % run_info_yaml)
        fc_name, fc_date, run_info = _run_info_from_yaml(fc_dir, run_info_yaml)
    else:
        logger.info("Fetching run details from Galaxy instance")
        fc_name, fc_date = get_flowcell_info(fc_dir)
        galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
        run_info = galaxy_api.run_details(fc_name, fc_date)
    return fc_name, fc_date, _organize_runs_by_lane(run_info)
def _get_run_info(fc_name, fc_date, config, run_info_yaml):
    """Retrieve run information from a passed YAML file or the Galaxy API.
    """
    if run_info_yaml and os.path.exists(run_info_yaml):
        log.info("Found YAML samplesheet, using %s instead of Galaxy API" % run_info_yaml)
        with open(run_info_yaml) as in_handle:
            run_details = yaml.load(in_handle)
        return dict(details=run_details, run_id="")
    else:
        log.info("Fetching run details from Galaxy instance")
        galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
        return galaxy_api.run_details(fc_name, fc_date)
Exemple #13
0
def _get_run_info(fc_name, fc_date, config, run_info_yaml):
    """Retrieve run information from a passed YAML file or the Galaxy API.
    """
    if run_info_yaml and os.path.exists(run_info_yaml):
        logger.info("Found YAML samplesheet, using %s instead of Galaxy API" % run_info_yaml)
        with open(run_info_yaml) as in_handle:
            run_details = yaml.load(in_handle)

        return dict(details=run_details, run_id="")

    else:
        logger.info("Fetching run details from Galaxy instance")
        galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])

        return galaxy_api.run_details(fc_name, fc_date)
def main(config_file, fc_dir, analysis_dir, run_info_yaml=None):
    config = load_config(config_file)
    galaxy_api = (GalaxyApiAccess(config['galaxy_url'],
                                  config['galaxy_api_key'])
                  if config.has_key("galaxy_api_key") else None)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)

    base_folder_name = "%s_%s" % (fc_date, fc_name)
    run_details = lims_run_details(run_info, base_folder_name)
    for (library_name, access_role, dbkey, lane, bc_id, name, desc, local_name,
         fname_out) in run_details:
        library_id = (get_galaxy_library(library_name, galaxy_api)
                      if library_name else None)
        upload_files = list(
            select_upload_files(local_name, bc_id, fc_dir, analysis_dir,
                                config, fname_out))
        if len(upload_files) > 0:
            print lane, bc_id, name, desc, library_name
            print "Creating storage directory"
            if library_id:
                folder, cur_galaxy_files = get_galaxy_folder(
                    library_id, base_folder_name, name, desc, galaxy_api)
            else:
                cur_galaxy_files = []
            store_dir = move_to_storage(lane, bc_id, base_folder_name,
                                        upload_files, cur_galaxy_files, config,
                                        config_file, fname_out)
            if store_dir and library_id:
                print "Uploading directory of files to Galaxy"
                print galaxy_api.upload_directory(library_id, folder['id'],
                                                  store_dir, dbkey,
                                                  access_role)
    if galaxy_api and not run_info_yaml:
        add_run_summary_metrics(analysis_dir, galaxy_api)
def main(config_file, fc_dir):
    work_dir = os.getcwd()
    config = load_config(config_file)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"], config["galaxy_api_key"])
    fc_name, fc_date = flowcell.parse_dirname(fc_dir)
    run_info = galaxy_api.run_details(fc_name)
    fastq_dir = flowcell.get_fastq_dir(fc_dir)
    if config["algorithm"]["num_cores"] > 1:
        pool = Pool(config["algorithm"]["num_cores"])
        try:
            pool.map(
                _process_wrapper, ((i, fastq_dir, fc_name, fc_date, config, config_file) for i in run_info["details"])
            )
        except:
            pool.terminate()
            raise
    else:
        map(_process_wrapper, ((i, fastq_dir, fc_name, fc_date, config, config_file) for i in run_info["details"]))
Exemple #16
0
def main(config_file, fc_dir):
    work_dir = os.getcwd()
    config = load_config(config_file)
    galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
    fc_name, fc_date = get_flowcell_info(fc_dir)
    run_info = galaxy_api.run_details(fc_name)
    fastq_dir = get_fastq_dir(fc_dir)
    if config["algorithm"]["num_cores"] > 1:
        pool = Pool(config["algorithm"]["num_cores"])
        try:
            pool.map(_process_wrapper,
                    ((i, fastq_dir, fc_name, fc_date, config, config_file)
                        for i in run_info["details"]))
        except:
            pool.terminate()
            raise
    else:
        map(_process_wrapper,
            ((i, fastq_dir, fc_name, fc_date, config, config_file)
                for i in run_info["details"]))
def main(config_file, fc_dir):
    work_dir = os.getcwd()
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
    fc_name, fc_date = get_flowcell_info(fc_dir)
    run_info = galaxy_api.run_details(fc_name)
    fastq_dir = get_fastq_dir(fc_dir)
    if config["algorithm"]["num_cores"] > 1:
        pool = Pool(config["algorithm"]["num_cores"])
        try:
            pool.map(_process_wrapper,
                    ((i, fastq_dir, fc_name, fc_date, config, config_file)
                        for i in run_info["details"]))
        except:
            pool.terminate()
            raise
    else:
        map(_process_wrapper,
            ((i, fastq_dir, fc_name, fc_date, config, config_file)
                for i in run_info["details"]))
def add_to_galaxy_datalibs(prepped_files, config):
    """Add the organized files to synchronized Galaxy data libraries.

    3 actions needed:
      - create data library for each top level item
      - create folders and sub-folders to subsequent levels
      - add links to data in final folders
    """
    galaxy_api = GalaxyApiAccess(config["galaxy_url"], config["galaxy_apikey"])
    for key, vals in prepped_files.iteritems():
        dl_name = "SCDE: %s -- %s" % (key[0], ", ".join([k for k in key[1:] if k]))
        _add_data_library(galaxy_api, dl_name, vals)
Exemple #19
0
def main(config_file, fc_dir, analysis_dir, run_info_yaml=None):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    fc_name, fc_date = get_flowcell_info(fc_dir)
    if run_info_yaml:
        with open(run_info_yaml) as in_handle:
            run_details = yaml.load(in_handle)
        run_info = dict(details=run_details, run_id="")
        galaxy_api = None
    else:
        galaxy_api = GalaxyApiAccess(config['galaxy_url'],
                                     config['galaxy_api_key'])
        run_info = galaxy_api.run_details(fc_name, fc_date)

    base_folder_name = "%s_%s" % (fc_date, fc_name)
    run_details = lims_run_details(run_info, fc_name, base_folder_name)
    for (library_name, access_role, dbkey, lane, bc_id, name, desc,
         local_name) in run_details:
        library_id = (get_galaxy_library(library_name, galaxy_api)
                      if library_name else None)
        upload_files = list(
            select_upload_files(local_name, bc_id, fc_dir, analysis_dir,
                                config))
        if len(upload_files) > 0:
            print lane, bc_id, name, desc, library_name
            print "Creating storage directory"
            if library_id:
                folder, cur_galaxy_files = get_galaxy_folder(
                    library_id, base_folder_name, name, desc, galaxy_api)
            else:
                cur_galaxy_files = []
            store_dir = move_to_storage(lane, bc_id, base_folder_name,
                                        upload_files, cur_galaxy_files, config)
            if store_dir and library_id:
                print "Uploading directory of files to Galaxy"
                print galaxy_api.upload_directory(library_id, folder['id'],
                                                  store_dir, dbkey,
                                                  access_role)
    if galaxy_api:
        add_run_summary_metrics(analysis_dir, galaxy_api)
def main(config_file, fc_dir, run_info_yaml=None):
    work_dir = os.getcwd()
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    if run_info_yaml:
        with open(run_info_yaml) as in_handle:
            run_details = yaml.load(in_handle)
        run_info = dict(details=run_details, run_id="")
    else:
        galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
        run_info = galaxy_api.run_details(fc_name)
    fc_name, fc_date = get_flowcell_info(fc_dir)
    run_items = _add_multiplex_to_control(run_info["details"])
    fastq_dir = get_fastq_dir(fc_dir)
    align_dir = os.path.join(work_dir, "alignments")

    # process each flowcell lane
    pool = (Pool(config["algorithm"]["num_cores"])
            if config["algorithm"]["num_cores"] > 1 else None)
    map_fn = pool.map if pool else map
    try:
        map_fn(_process_lane_wrapper,
                ((i, fastq_dir, fc_name, fc_date, align_dir, config, config_file)
                    for i in run_items))
    except:
        if pool:
            pool.terminate()
        raise
    # process samples, potentially multiplexed across multiple lanes
    sample_files, sample_fastq, sample_info = organize_samples(align_dir,
            fastq_dir, work_dir, fc_name, fc_date, run_items)
    try:
        map_fn(_process_sample_wrapper,
          ((name, sample_fastq[name], sample_info[name], bam_files, work_dir,
              config, config_file) for name, bam_files in sample_files))
    except:
        if pool:
            pool.terminate()
        raise
    write_metrics(run_info, work_dir, fc_dir, fc_name, fc_date, fastq_dir)
def main(config_file, month, year):
    with open(config_file) as in_handle:
        config = yaml.safe_load(in_handle)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"],
        config["galaxy_apikey"])
    smonth, syear = (month - 1, year) if month > 1 else (12, year - 1)
    start_date = datetime(syear, smonth, 15, 0, 0, 0)
    # last day calculation useful if definition of month is
    # from first to last day instead of 15th-15th
    #(_, last_day) = calendar.monthrange(year, month)
    end_date = datetime(year, month, 14, 23, 59, 59)
    out_file = "%s_%s" % (start_date.strftime("%b"),
            end_date.strftime("%b-%Y-sequencing.csv"))
    with open(out_file, "w") as out_handle:
        writer = csv.writer(out_handle)
        writer.writerow([
            "Date", "Product", "Payment", "Researcher", "Lab", "Email",
            "Project", "Sample", "Description", "Genome", "Flowcell",
            "Lane", "Received", "Notes"])
        for s in galaxy_api.sqn_report(start_date.isoformat(),
                end_date.isoformat()):
            f_parts = s["sqn_run"]["run_folder"].split("_")
            flowcell = "_".join([f_parts[0], f_parts[-1]])
            writer.writerow([
                s["sqn_run"]["date"],
                s["sqn_type"],
                s["project"]["payment_(fund_number)"],
                s["project"]["researcher"],
                s["project"]["lab_association"],
                s["project"]["email"],
                s["project"]["project_name"],
                s["name"],
                s["description"],
                s["genome_build"],
                flowcell,
                s["sqn_run"]["lane"],
                _received_date(s["events"]),
                s["sqn_run"]["results_notes"]])
def run_main(config, config_file, fc_dir, run_info_yaml):
    work_dir = os.getcwd()
    fc_name, fc_date = get_flowcell_info(fc_dir)

    if run_info_yaml and os.path.exists(run_info_yaml):
        log.info("Found YAML samplesheet, using %s instead of Galaxy API" %
                 run_info_yaml)
        with open(run_info_yaml) as in_handle:
            run_details = yaml.load(in_handle)
        run_info = dict(details=run_details, run_id="")
    else:
        log.info("Fetching run details from Galaxy instance")
        galaxy_api = GalaxyApiAccess(config['galaxy_url'],
                                     config['galaxy_api_key'])
        run_info = galaxy_api.run_details(fc_name, fc_date)
    fastq_dir = get_fastq_dir(fc_dir)
    run_items = _add_multiplex_across_lanes(run_info["details"], fastq_dir,
                                            fc_name)
    align_dir = os.path.join(work_dir, "alignments")

    # process each flowcell lane
    with utils.cpmap(config["algorithm"]["num_cores"]) as cpmap:
        for _ in cpmap(
                process_lane,
            ((i, fastq_dir, fc_name, fc_date, align_dir, config, config_file)
             for i in run_items)):
            pass
    # process samples, potentially multiplexed across multiple lanes
    sample_files, sample_fastq, sample_info = organize_samples(
        align_dir, fastq_dir, work_dir, fc_name, fc_date, run_items)
    with utils.cpmap(config["algorithm"]["num_cores"]) as cpmap:
        for _ in cpmap(process_sample,
                       ((name, sample_fastq[name], sample_info[name],
                         bam_files, work_dir, config, config_file)
                        for name, bam_files in sample_files)):
            pass
    write_metrics(run_info, work_dir, fc_dir, fc_name, fc_date, fastq_dir)
Exemple #23
0
def get_runinfo(galaxy_url, galaxy_apikey, run_folder):
    """Retrieve run information for a processed directory from Galaxy nglims API.
    """
    galaxy_api = GalaxyApiAccess(galaxy_url, galaxy_apikey)
    fc_name, fc_date = flowcell.parse_dirname(run_folder)
    return galaxy_api.run_details(fc_name, fc_date)