Example #1
0
def get_runinfo(galaxy_url, galaxy_apikey, run_folder, storedir):
    """Retrieve flattened run information for a processed directory from Galaxy nglims API.
    """
    galaxy_api = GalaxyApiAccess(galaxy_url, galaxy_apikey)
    fc_name, fc_date = flowcell.parse_dirname(run_folder)
    galaxy_info = galaxy_api.run_details(fc_name, fc_date)
    if "error" in galaxy_info:
        return galaxy_info
    if not galaxy_info["run_name"].startswith(fc_date) and not galaxy_info["run_name"].endswith(fc_name):
        raise ValueError("Galaxy NGLIMS information %s does not match flowcell %s %s" %
                         (galaxy_info["run_name"], fc_date, fc_name))
    ldetails = _flatten_lane_details(galaxy_info)
    out = []
    for item in ldetails:
        # Do uploads for all non-controls
        if item["description"] != "control" or item["project_name"] != "control":
            item["upload"] = {"method": "galaxy", "run_id": galaxy_info["run_id"],
                              "fc_name": fc_name, "fc_date": fc_date,
                              "dir": storedir,
                              "galaxy_url": galaxy_url, "galaxy_api_key": galaxy_apikey}
            for k in ["lab_association", "private_libs", "researcher", "researcher_id", "sample_id",
                      "galaxy_library", "galaxy_role"]:
                item["upload"][k] = item.pop(k, "")
        out.append(item)
    return out
Example #2
0
def organize(dirs, config, run_info_yaml):
    """Organize run information from a passed YAML file or the Galaxy API.

    Creates the high level structure used for subsequent processing.
    """
    if run_info_yaml and os.path.exists(run_info_yaml):
        logger.info("Using input YAML configuration: %s" % run_info_yaml)
        run_details = _run_info_from_yaml(dirs["flowcell"], run_info_yaml, config)
    else:
        logger.info("Fetching run details from Galaxy instance")
        fc_name, fc_date = flowcell.parse_dirname(dirs["flowcell"])
        galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
        run_details = []
        galaxy_info = galaxy_api.run_details(fc_name, fc_date)
        for item in galaxy_info["details"]:
            item["upload"] = {"method": "galaxy", "run_id": galaxy_info["run_id"],
                              "fc_name": fc_name, "fc_date": fc_date}
            run_details.append(item)
    out = []
    for item in run_details:
        # add algorithm details to configuration, avoid double specification
        item["config"] = config_utils.update_w_custom(config, item)
        item.pop("algorithm", None)
        item["dirs"] = dirs
        if "name" not in item:
            item["name"] = ["", item["description"]]
        item = add_reference_resources(item)
        out.append(item)
    return out
Example #3
0
def get_runinfo(galaxy_url, galaxy_apikey, run_folder, storedir):
    """Retrieve flattened run information for a processed directory from Galaxy nglims API.
    """
    galaxy_api = GalaxyApiAccess(galaxy_url, galaxy_apikey)
    fc_name, fc_date = flowcell.parse_dirname(run_folder)
    galaxy_info = galaxy_api.run_details(fc_name, fc_date)
    if "error" in galaxy_info:
        return galaxy_info
    if not galaxy_info["run_name"].startswith(fc_date) and not galaxy_info["run_name"].endswith(fc_name):
        raise ValueError("Galaxy NGLIMS information %s does not match flowcell %s %s" %
                         (galaxy_info["run_name"], fc_date, fc_name))
    ldetails = _flatten_lane_details(galaxy_info)
    out = []
    for item in ldetails:
        # Do uploads for all non-controls
        if item["description"] != "control" or item["project_name"] != "control":
            item["upload"] = {"method": "galaxy", "run_id": galaxy_info["run_id"],
                              "fc_name": fc_name, "fc_date": fc_date,
                              "dir": storedir,
                              "galaxy_url": galaxy_url, "galaxy_api_key": galaxy_apikey}
            for k in ["lab_association", "private_libs", "researcher", "researcher_id", "sample_id",
                      "galaxy_library", "galaxy_role"]:
                item["upload"][k] = item.pop(k, "")
        out.append(item)
    return out
Example #4
0
def _run_info_from_yaml(fc_dir, run_info_yaml, config):
    """Read run information from a passed YAML file.
    """
    with open(run_info_yaml) as in_handle:
        loaded = yaml.load(in_handle)
    fc_name, fc_date = None, None
    if fc_dir:
        try:
            fc_name, fc_date = flowcell.parse_dirname(fc_dir)
        except ValueError:
            pass
    global_config = {}
    global_vars = {}
    if isinstance(loaded, dict):
        global_config = copy.deepcopy(loaded)
        del global_config["details"]
        if "fc_name" in loaded and "fc_date" in loaded:
            fc_name = loaded["fc_name"].replace(" ", "_")
            fc_date = str(loaded["fc_date"]).replace(" ", "_")
        global_vars = global_config.pop("globals", {})
        loaded = loaded["details"]

    run_details = []
    for i, item in enumerate(loaded):
        item = _normalize_files(item, fc_dir)
        if "lane" not in item:
            item["lane"] = str(i + 1)
        item["lane"] = _clean_characters(str(item["lane"]))
        if "description" not in item:
            if _item_is_bam(item):
                item["description"] = get_sample_name(item["files"][0])
            else:
                raise ValueError(
                    "No `description` sample name provided for input #%s" %
                    (i + 1))
        item["description"] = _clean_characters(str(item["description"]))
        if "upload" not in item:
            upload = global_config.get("upload", {})
            # Handle specifying a local directory directly in upload
            if isinstance(upload, basestring):
                upload = {"dir": upload}
            if fc_name and fc_date:
                upload["fc_name"] = fc_name
                upload["fc_date"] = fc_date
            upload["run_id"] = ""
            item["upload"] = upload
        item["algorithm"] = _replace_global_vars(item["algorithm"],
                                                 global_vars)
        item["algorithm"] = genome.abs_file_paths(
            item["algorithm"], ignore_keys=ALGORITHM_NOPATH_KEYS)
        item["genome_build"] = str(item.get("genome_build", ""))
        item["algorithm"] = _add_algorithm_defaults(item["algorithm"])
        item["rgnames"] = prep_rg_names(item, config, fc_name, fc_date)
        item["test_run"] = global_config.get("test_run", False)
        item = _clean_metadata(item)
        run_details.append(item)
    _check_sample_config(run_details, run_info_yaml)
    return run_details
Example #5
0
def _run_info_from_yaml(fc_dir, run_info_yaml, config):
    """Read run information from a passed YAML file.
    """
    with open(run_info_yaml) as in_handle:
        loaded = yaml.load(in_handle)
    fc_name, fc_date = None, None
    if fc_dir:
        try:
            fc_name, fc_date = flowcell.parse_dirname(fc_dir)
        except ValueError:
            pass
    global_config = {}
    global_vars = {}
    if isinstance(loaded, dict):
        global_config = copy.deepcopy(loaded)
        del global_config["details"]
        if "fc_name" in loaded and "fc_date" in loaded:
            fc_name = loaded["fc_name"].replace(" ", "_")
            fc_date = str(loaded["fc_date"]).replace(" ", "_")
        global_vars = global_config.pop("globals", {})
        loaded = loaded["details"]

    run_details = []
    for i, item in enumerate(loaded):
        item = _normalize_files(item, fc_dir)
        if "lane" not in item:
            item["lane"] = str(i + 1)
        item["lane"] = _clean_characters(str(item["lane"]))
        if "description" not in item:
            if _item_is_bam(item):
                item["description"] = get_sample_name(item["files"][0])
            else:
                raise ValueError("No `description` sample name provided for input #%s" % (i + 1))
        item["description"] = _clean_characters(str(item["description"]))
        if "upload" not in item:
            upload = global_config.get("upload", {})
            # Handle specifying a local directory directly in upload
            if isinstance(upload, basestring):
                upload = {"dir": upload}
            if fc_name and fc_date:
                upload["fc_name"] = fc_name
                upload["fc_date"] = fc_date
            upload["run_id"] = ""
            item["upload"] = upload
        item["algorithm"] = _replace_global_vars(item["algorithm"], global_vars)
        item["algorithm"] = genome.abs_file_paths(item["algorithm"],
                                                  ignore_keys=ALGORITHM_NOPATH_KEYS)
        item["genome_build"] = str(item.get("genome_build", ""))
        item["algorithm"] = _add_algorithm_defaults(item["algorithm"])
        item["rgnames"] = prep_rg_names(item, config, fc_name, fc_date)
        item["test_run"] = global_config.get("test_run", False)
        item = _clean_metadata(item)
        item = _clean_algorithm(item)
        run_details.append(item)
    _check_sample_config(run_details, run_info_yaml)
    return run_details
Example #6
0
def _write_sample_config(run_folder, ldetails):
    """Generate a bcbio-nextgen YAML configuration file for processing a sample.
    """
    out_file = os.path.join(run_folder, "%s.yaml" % os.path.basename(run_folder))
    with open(out_file, "w") as out_handle:
        fc_name, fc_date = flowcell.parse_dirname(run_folder)
        out = {"details": sorted([_prepare_sample(x, run_folder) for x in ldetails],
                                 key=operator.itemgetter("name", "description")),
               "fc_name": fc_name,
               "fc_date": fc_date}
        yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
    return out_file
Example #7
0
def _write_sample_config(run_folder, ldetails):
    """Generate a bcbio-nextgen YAML configuration file for processing a sample.
    """
    out_file = os.path.join(run_folder, "%s.yaml" % os.path.basename(run_folder))
    with open(out_file, "w") as out_handle:
        fc_name, fc_date = flowcell.parse_dirname(run_folder)
        out = {"details": sorted([_prepare_sample(x, run_folder) for x in ldetails],
                                 key=operator.itemgetter("name", "description")),
               "fc_name": fc_name,
               "fc_date": fc_date}
        yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
    return out_file
def main(config_file, fc_dir):
    work_dir = os.getcwd()
    config = load_config(config_file)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"], config["galaxy_api_key"])
    fc_name, fc_date = flowcell.parse_dirname(fc_dir)
    run_info = galaxy_api.run_details(fc_name)
    fastq_dir = flowcell.get_fastq_dir(fc_dir)
    if config["algorithm"]["num_cores"] > 1:
        pool = Pool(config["algorithm"]["num_cores"])
        try:
            pool.map(
                _process_wrapper, ((i, fastq_dir, fc_name, fc_date, config, config_file) for i in run_info["details"])
            )
        except:
            pool.terminate()
            raise
    else:
        map(_process_wrapper, ((i, fastq_dir, fc_name, fc_date, config, config_file) for i in run_info["details"]))
Example #9
0
def main(config_file, fc_dir):
    work_dir = os.getcwd()
    config = load_config(config_file)
    galaxy_api = GalaxyApiAccess(config['galaxy_url'],
                                 config['galaxy_api_key'])
    fc_name, fc_date = flowcell.parse_dirname(fc_dir)
    run_info = galaxy_api.run_details(fc_name)
    fastq_dir = flowcell.get_fastq_dir(fc_dir)
    if config["algorithm"]["num_cores"] > 1:
        pool = Pool(config["algorithm"]["num_cores"])
        try:
            pool.map(_process_wrapper,
                     ((i, fastq_dir, fc_name, fc_date, config, config_file)
                      for i in run_info["details"]))
        except:
            pool.terminate()
            raise
    else:
        map(_process_wrapper,
            ((i, fastq_dir, fc_name, fc_date, config, config_file)
             for i in run_info["details"]))
Example #10
0
def run_has_samplesheet(fc_dir, config, require_single=True):
    """Checks if there's a suitable SampleSheet.csv present for the run
    """
    fc_name, _ = flowcell.parse_dirname(fc_dir)
    sheet_dirs = config.get("samplesheet_directories", [])
    fcid_sheet = {}
    for ss_dir in (s for s in sheet_dirs if os.path.exists(s)):
        with utils.chdir(ss_dir):
            for ss in glob.glob("*.csv"):
                fc_ids = _get_flowcell_id(ss, require_single)
                for fcid in fc_ids:
                    if fcid:
                        fcid_sheet[fcid] = os.path.join(ss_dir, ss)
    # difflib handles human errors while entering data on the SampleSheet.
    # Only one best candidate is returned (if any). 0.85 cutoff allows for
    # maximum of 2 mismatches in fcid

    potential_fcids = difflib.get_close_matches(fc_name, fcid_sheet.keys(), 1, 0.85)
    if len(potential_fcids) > 0 and fcid_sheet.has_key(potential_fcids[0]):
        return fcid_sheet[potential_fcids[0]]
    else:
        return None
Example #11
0
def _run_info_from_yaml(dirs,
                        run_info_yaml,
                        config,
                        sample_names=None,
                        integrations=None):
    """Read run information from a passed YAML file.
    """
    validate_yaml(run_info_yaml, run_info_yaml)
    with open(run_info_yaml) as in_handle:
        loaded = yaml.load(in_handle)
    fc_name, fc_date = None, None
    if dirs.get("flowcell"):
        try:
            fc_name, fc_date = flowcell.parse_dirname(dirs.get("flowcell"))
        except ValueError:
            pass
    global_config = {}
    global_vars = {}
    resources = {}
    integration_config = {}
    if isinstance(loaded, dict):
        global_config = copy.deepcopy(loaded)
        del global_config["details"]
        if "fc_name" in loaded:
            fc_name = loaded["fc_name"].replace(" ", "_")
        if "fc_date" in loaded:
            fc_date = str(loaded["fc_date"]).replace(" ", "_")
        global_vars = global_config.pop("globals", {})
        resources = global_config.pop("resources", {})
        for iname in ["arvados"]:
            integration_config[iname] = global_config.pop(iname, {})
        loaded = loaded["details"]
    if sample_names:
        loaded = [x for x in loaded if x["description"] in sample_names]

    if integrations:
        for iname, retriever in integrations.items():
            if iname in config:
                loaded = retriever.add_remotes(loaded, config[iname])

    run_details = []
    for i, item in enumerate(loaded):
        item = _normalize_files(item, dirs.get("flowcell"))
        if "lane" not in item:
            item["lane"] = str(i + 1)
        item["lane"] = _clean_characters(str(item["lane"]))
        if "description" not in item:
            if _item_is_bam(item):
                item["description"] = get_sample_name(item["files"][0])
            else:
                raise ValueError(
                    "No `description` sample name provided for input #%s" %
                    (i + 1))
        item["description"] = _clean_characters(str(item["description"]))
        if "upload" not in item:
            upload = global_config.get("upload", {})
            # Handle specifying a local directory directly in upload
            if isinstance(upload, basestring):
                upload = {"dir": upload}
            if fc_name:
                upload["fc_name"] = fc_name
            if fc_date:
                upload["fc_date"] = fc_date
            upload["run_id"] = ""
            if upload.get("dir"):
                upload["dir"] = _file_to_abs(upload["dir"], [dirs.get("work")],
                                             makedir=True)
            item["upload"] = upload
        item["algorithm"] = _replace_global_vars(item["algorithm"],
                                                 global_vars)
        item["algorithm"] = genome.abs_file_paths(
            item["algorithm"],
            ignore_keys=ALGORITHM_NOPATH_KEYS,
            fileonly_keys=ALGORITHM_FILEONLY_KEYS,
            do_download=all(not x for x in integrations.values()))
        item["genome_build"] = str(item.get("genome_build", ""))
        item["algorithm"] = _add_algorithm_defaults(item["algorithm"])
        item["metadata"] = add_metadata_defaults(item.get("metadata", {}))
        item["rgnames"] = prep_rg_names(item, config, fc_name, fc_date)
        if item.get("files"):
            item["files"] = [
                genome.abs_file_paths(
                    f, do_download=all(not x for x in integrations.values()))
                for f in item["files"]
            ]
        elif "files" in item:
            del item["files"]
        if item.get("vrn_file") and isinstance(item["vrn_file"], basestring):
            inputs_dir = utils.safe_makedir(
                os.path.join(dirs.get("work", os.getcwd()), "inputs",
                             item["description"]))
            item["vrn_file"] = vcfutils.bgzip_and_index(genome.abs_file_paths(
                item["vrn_file"],
                do_download=all(not x for x in integrations.values())),
                                                        config,
                                                        remove_orig=False,
                                                        out_dir=inputs_dir)
        item = _clean_metadata(item)
        item = _clean_algorithm(item)
        # Add any global resource specifications
        if "resources" not in item:
            item["resources"] = {}
        for prog, pkvs in resources.items():
            if prog not in item["resources"]:
                item["resources"][prog] = {}
            if pkvs is not None:
                for key, val in pkvs.items():
                    item["resources"][prog][key] = val
        for iname, ivals in integration_config.items():
            if ivals:
                if iname not in item:
                    item[iname] = {}
                for k, v in ivals.items():
                    item[iname][k] = v

        run_details.append(item)
    _check_sample_config(run_details, run_info_yaml, config)
    return run_details
Example #12
0
def _run_info_from_yaml(dirs,
                        run_info_yaml,
                        config,
                        sample_names=None,
                        is_cwl=False,
                        integrations=None):
    """Read run information from a passed YAML file.
    """
    validate_yaml(run_info_yaml, run_info_yaml)
    with open(run_info_yaml) as in_handle:
        loaded = yaml.load(in_handle)
    fc_name, fc_date = None, None
    if dirs.get("flowcell"):
        try:
            fc_name, fc_date = flowcell.parse_dirname(dirs.get("flowcell"))
        except ValueError:
            pass
    global_config = {}
    global_vars = {}
    resources = {}
    integration_config = {}
    if isinstance(loaded, dict):
        global_config = copy.deepcopy(loaded)
        del global_config["details"]
        if "fc_name" in loaded:
            fc_name = loaded["fc_name"].replace(" ", "_")
        if "fc_date" in loaded:
            fc_date = str(loaded["fc_date"]).replace(" ", "_")
        global_vars = global_config.pop("globals", {})
        resources = global_config.pop("resources", {})
        for iname in ["arvados"]:
            integration_config[iname] = global_config.pop(iname, {})
        loaded = loaded["details"]
    if sample_names:
        loaded = [x for x in loaded if x["description"] in sample_names]

    if integrations:
        for iname, retriever in integrations.items():
            if iname in config:
                config[iname] = retriever.set_cache(config[iname])
                loaded = retriever.add_remotes(loaded, config[iname])

    run_details = []
    for i, item in enumerate(loaded):
        item = _normalize_files(item, dirs.get("flowcell"))
        if "lane" not in item:
            item["lane"] = str(i + 1)
        item["lane"] = _clean_characters(str(item["lane"]))
        if "description" not in item:
            if _item_is_bam(item):
                item["description"] = get_sample_name(item["files"][0])
            else:
                raise ValueError(
                    "No `description` sample name provided for input #%s" %
                    (i + 1))
        description = _clean_characters(str(item["description"]))
        item["description"] = description
        # make names R safe if we are likely to use R downstream
        if item["analysis"].lower() in R_DOWNSTREAM_ANALYSIS:
            if description[0].isdigit():
                valid = "X" + description
                logger.info("%s is not a valid R name, converting to %s." %
                            (description, valid))
                item["description"] = valid
        if "upload" not in item:
            upload = global_config.get("upload", {})
            # Handle specifying a local directory directly in upload
            if isinstance(upload, basestring):
                upload = {"dir": upload}
            if not upload:
                upload["dir"] = "../final"
            if fc_name:
                upload["fc_name"] = fc_name
            if fc_date:
                upload["fc_date"] = fc_date
            upload["run_id"] = ""
            if upload.get("dir"):
                upload["dir"] = _file_to_abs(upload["dir"], [dirs.get("work")],
                                             makedir=True)
            item["upload"] = upload
        item["algorithm"] = _replace_global_vars(item["algorithm"],
                                                 global_vars)
        item["algorithm"] = genome.abs_file_paths(
            item["algorithm"],
            ignore_keys=ALGORITHM_NOPATH_KEYS,
            fileonly_keys=ALGORITHM_FILEONLY_KEYS,
            do_download=all(not x for x in integrations.values()))
        item["genome_build"] = str(item.get("genome_build", ""))
        item["algorithm"] = _add_algorithm_defaults(item["algorithm"],
                                                    item.get("analysis", ""),
                                                    is_cwl)
        item["metadata"] = add_metadata_defaults(item.get("metadata", {}))
        item["rgnames"] = prep_rg_names(item, config, fc_name, fc_date)
        if item.get("files"):
            item["files"] = [
                genome.abs_file_paths(
                    f, do_download=all(not x for x in integrations.values()))
                for f in item["files"]
            ]
        elif "files" in item:
            del item["files"]
        if item.get("vrn_file") and isinstance(item["vrn_file"], basestring):
            inputs_dir = utils.safe_makedir(
                os.path.join(dirs.get("work", os.getcwd()), "inputs",
                             item["description"]))
            item["vrn_file"] = genome.abs_file_paths(
                item["vrn_file"],
                do_download=all(not x for x in integrations.values()))
            if os.path.isfile(item["vrn_file"]):
                # Try to prepare in place (or use ready to go inputs)
                try:
                    item["vrn_file"] = vcfutils.bgzip_and_index(
                        item["vrn_file"], config, remove_orig=False)
                # In case of permission errors, fix in inputs directory
                except IOError:
                    item["vrn_file"] = vcfutils.bgzip_and_index(
                        item["vrn_file"],
                        config,
                        remove_orig=False,
                        out_dir=inputs_dir)
            if not tz.get_in(("metadata", "batch"), item) and tz.get_in(
                ["algorithm", "validate"], item):
                raise ValueError(
                    "%s: Please specify a metadata batch for variant file (vrn_file) input.\n"
                    % (item["description"]) +
                    "Batching with a standard sample provides callable regions for validation."
                )
        item = _clean_metadata(item)
        item = _clean_algorithm(item)
        item = _clean_background(item)
        # Add any global resource specifications
        if "resources" not in item:
            item["resources"] = {}
        for prog, pkvs in resources.items():
            if prog not in item["resources"]:
                item["resources"][prog] = {}
            if pkvs is not None:
                for key, val in pkvs.items():
                    item["resources"][prog][key] = val
        for iname, ivals in integration_config.items():
            if ivals:
                if iname not in item:
                    item[iname] = {}
                for k, v in ivals.items():
                    item[iname][k] = v

        run_details.append(item)
    _check_sample_config(run_details, run_info_yaml, config)
    return run_details
Example #13
0
def _run_info_from_yaml(dirs, run_info_yaml, config, sample_names=None):
    """Read run information from a passed YAML file.
    """
    with open(run_info_yaml) as in_handle:
        loaded = yaml.load(in_handle)
    fc_name, fc_date = None, None
    if dirs.get("flowcell"):
        try:
            fc_name, fc_date = flowcell.parse_dirname(dirs.get("flowcell"))
        except ValueError:
            pass
    global_config = {}
    global_vars = {}
    resources = {}
    if isinstance(loaded, dict):
        global_config = copy.deepcopy(loaded)
        del global_config["details"]
        if "fc_name" in loaded and "fc_date" in loaded:
            fc_name = loaded["fc_name"].replace(" ", "_")
            fc_date = str(loaded["fc_date"]).replace(" ", "_")
        global_vars = global_config.pop("globals", {})
        resources = global_config.pop("resources", {})
        loaded = loaded["details"]
    if sample_names:
        loaded = [x for x in loaded if x["description"] in sample_names]

    run_details = []
    for i, item in enumerate(loaded):
        item = _normalize_files(item, dirs.get("flowcell"))
        if "lane" not in item:
            item["lane"] = str(i + 1)
        item["lane"] = _clean_characters(str(item["lane"]))
        if "description" not in item:
            if _item_is_bam(item):
                item["description"] = get_sample_name(item["files"][0])
            else:
                raise ValueError(
                    "No `description` sample name provided for input #%s" %
                    (i + 1))
        item["description"] = _clean_characters(str(item["description"]))
        if "upload" not in item:
            upload = global_config.get("upload", {})
            # Handle specifying a local directory directly in upload
            if isinstance(upload, basestring):
                upload = {"dir": upload}
            if fc_name and fc_date:
                upload["fc_name"] = fc_name
                upload["fc_date"] = fc_date
            upload["run_id"] = ""
            if upload.get("dir"):
                upload["dir"] = _file_to_abs(upload["dir"], [dirs.get("work")],
                                             makedir=True)
            item["upload"] = upload
        item["algorithm"] = _replace_global_vars(item["algorithm"],
                                                 global_vars)
        item["algorithm"] = genome.abs_file_paths(
            item["algorithm"], ignore_keys=ALGORITHM_NOPATH_KEYS)
        item["genome_build"] = str(item.get("genome_build", ""))
        item["algorithm"] = _add_algorithm_defaults(item["algorithm"])
        item["rgnames"] = prep_rg_names(item, config, fc_name, fc_date)
        item["test_run"] = global_config.get("test_run", False)
        if item.get("files"):
            item["files"] = [genome.abs_file_paths(f) for f in item["files"]]
        elif "files" in item:
            del item["files"]
        if item.get("vrn_file") and isinstance(item["vrn_file"], basestring):
            item["vrn_file"] = vcfutils.bgzip_and_index(
                genome.abs_file_paths(item["vrn_file"]), config)
        item = _clean_metadata(item)
        item = _clean_algorithm(item)
        # Add any global resource specifications
        if "resources" not in item:
            item["resources"] = {}
        for prog, pkvs in resources.iteritems():
            if prog not in item["resources"]:
                item["resources"][prog] = {}
            for key, val in pkvs.iteritems():
                item["resources"][prog][key] = val
        run_details.append(item)
    _check_sample_config(run_details, run_info_yaml, config)
    return run_details