def organize(dirs, config, run_info_yaml, sample_names=None, is_cwl=False, integrations=None): """Organize run information from a passed YAML file or the Galaxy API. Creates the high level structure used for subsequent processing. sample_names is a list of samples to include from the overall file, for cases where we are running multiple pipelines from the same configuration file. """ from bcbio.pipeline import qcsummary if integrations is None: integrations = {} logger.info("Using input YAML configuration: %s" % run_info_yaml) assert run_info_yaml and os.path.exists(run_info_yaml), \ "Did not find input sample YAML file: %s" % run_info_yaml run_details = _run_info_from_yaml(dirs, run_info_yaml, config, sample_names, is_cwl=is_cwl, integrations=integrations) remote_retriever = None for iname, retriever in integrations.items(): if iname in config: run_details = retriever.add_remotes(run_details, config[iname]) remote_retriever = retriever out = [] for item in run_details: item["dirs"] = dirs if "name" not in item: item["name"] = ["", item["description"]] elif isinstance(item["name"], basestring): description = "%s-%s" % (item["name"], clean_name(item["description"])) item["name"] = [item["name"], description] item["description"] = description # add algorithm details to configuration, avoid double specification item["resources"] = _add_remote_resources(item["resources"]) item["config"] = config_utils.update_w_custom(config, item) item.pop("algorithm", None) item = add_reference_resources(item, remote_retriever) item["config"]["algorithm"]["qc"] = qcsummary.get_qc_tools(item) item["config"]["algorithm"]["vcfanno"] = vcfanno.find_annotations(item) # Create temporary directories and make absolute, expanding environmental variables tmp_dir = tz.get_in(["config", "resources", "tmp", "dir"], item) if tmp_dir: # if no environmental variables, make and normalize the directory # otherwise we normalize later in distributed.transaction: if os.path.expandvars(tmp_dir) == tmp_dir: tmp_dir = utils.safe_makedir(os.path.expandvars(tmp_dir)) tmp_dir = genome.abs_file_paths(tmp_dir, do_download=not integrations) item["config"]["resources"]["tmp"]["dir"] = tmp_dir out.append(item) out = _add_provenance(out, dirs, config, not is_cwl) return out
def organize(dirs, config, run_info_yaml, sample_names=None, add_provenance=True, integrations=None): """Organize run information from a passed YAML file or the Galaxy API. Creates the high level structure used for subsequent processing. sample_names is a list of samples to include from the overall file, for cases where we are running multiple pipelines from the same configuration file. """ from bcbio.pipeline import qcsummary if integrations is None: integrations = {} logger.info("Using input YAML configuration: %s" % run_info_yaml) assert run_info_yaml and os.path.exists(run_info_yaml), \ "Did not find input sample YAML file: %s" % run_info_yaml run_details = _run_info_from_yaml(dirs, run_info_yaml, config, sample_names, integrations=integrations) remote_retriever = None for iname, retriever in integrations.items(): if iname in config: run_details = retriever.add_remotes(run_details, config[iname]) remote_retriever = retriever out = [] for item in run_details: item["dirs"] = dirs if "name" not in item: item["name"] = ["", item["description"]] elif isinstance(item["name"], basestring): description = "%s-%s" % (item["name"], clean_name(item["description"])) item["name"] = [item["name"], description] item["description"] = description # add algorithm details to configuration, avoid double specification item["resources"] = _add_remote_resources(item["resources"]) item["config"] = config_utils.update_w_custom(config, item) item.pop("algorithm", None) item = add_reference_resources(item, remote_retriever) item["config"]["algorithm"]["qc"] = qcsummary.get_qc_tools(item) # Create temporary directories and make absolute, expanding environmental variables tmp_dir = tz.get_in(["config", "resources", "tmp", "dir"], item) if tmp_dir: # if no environmental variables, make and normalize the directory # otherwise we normalize later in distributed.transaction: if os.path.expandvars(tmp_dir) == tmp_dir: tmp_dir = utils.safe_makedir(os.path.expandvars(tmp_dir)) tmp_dir = genome.abs_file_paths(tmp_dir, do_download=not integrations) item["config"]["resources"]["tmp"]["dir"] = tmp_dir out.append(item) out = _add_provenance(out, dirs, config, add_provenance) return out