def main(config_file, fc_dir=None, run_info_yaml=None, numcores=None, paralleltype=None, queue=None, scheduler=None, upgrade=None, profile=None, workflow=None, inputs=None, resources="", timeout=15, retries=None): work_dir = os.getcwd() config, config_file = load_system_config(config_file) if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(work_dir, "log") paralleltype, numcores = _get_cores_and_type(config, fc_dir, run_info_yaml, numcores, paralleltype) parallel = {"type": paralleltype, "cores": numcores, "scheduler": scheduler, "queue": queue, "profile": profile, "module": "bcbio.distributed", "resources": resources, "timeout": timeout, "retries": retries} if parallel["type"] in ["local", "messaging-main"]: if numcores is None: config["algorithm"]["num_cores"] = numcores run_main(config, config_file, work_dir, parallel, fc_dir, run_info_yaml) elif parallel["type"] == "messaging": parallel["task_module"] = "bcbio.distributed.tasks" args = [config_file, fc_dir] if run_info_yaml: args.append(run_info_yaml) messaging.run_and_monitor(config, config_file, args, parallel) elif parallel["type"] == "ipython": assert parallel["queue"] is not None, "IPython parallel requires a specified queue (-q)" assert parallel["scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)" run_main(config, config_file, work_dir, parallel, fc_dir, run_info_yaml) else: raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
def run_main(workdir, config_file=None, fc_dir=None, run_info_yaml=None, parallel=None, workflow=None): """Run variant analysis, handling command line options. """ # Set environment to standard to use periods for decimals and avoid localization os.environ["LC_ALL"] = "C" os.environ["LC"] = "C" os.environ["LANG"] = "C" workdir = utils.safe_makedir(os.path.abspath(workdir)) os.chdir(workdir) config, config_file = config_utils.load_system_config(config_file, workdir) if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(workdir, DEFAULT_LOG_DIR) if parallel["type"] in ["local", "clusterk"]: _setup_resources() _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) elif parallel["type"] == "ipython": assert parallel[ "scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)" if parallel["scheduler"] != "sge": assert parallel[ "queue"] is not None, "IPython parallel requires a specified queue (-q)" elif not parallel["queue"]: parallel["queue"] = "" _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) else: raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
def run_main(workdir, config_file=None, fc_dir=None, run_info_yaml=None, numcores=None, paralleltype=None, queue=None, scheduler=None, upgrade=None, tag=None, workflow=None, inputs=None, resources="", timeout=15, retries=None): """Run variant analysis, handling command line options. """ os.chdir(workdir) config, config_file = load_system_config(config_file, workdir) if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(workdir, "log") paralleltype, numcores = _get_cores_and_type(numcores, paralleltype, scheduler) parallel = {"type": paralleltype, "cores": numcores, "scheduler": scheduler, "queue": queue, "tag": tag, "module": "bcbio.distributed", "resources": resources, "timeout": timeout, "retries": retries} if parallel["type"] in ["local"]: _setup_resources() _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) elif parallel["type"] == "ipython": assert parallel["queue"] is not None, "IPython parallel requires a specified queue (-q)" assert parallel["scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)" _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) else: raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
def _get_system_config(work_dir, system): try: _, system = load_system_config(config_file="bcbio_system.yaml", work_dir=work_dir) except ValueError: system = None return system
def run_main(workdir, config_file=None, fc_dir=None, run_info_yaml=None, parallel=None, workflow=None): """Run variant analysis, handling command line options. """ # Set environment to standard to use periods for decimals and avoid localization os.environ["LC_ALL"] = "C" os.environ["LC"] = "C" os.environ["LANG"] = "C" workdir = utils.safe_makedir(os.path.abspath(workdir)) os.chdir(workdir) config, config_file = config_utils.load_system_config(config_file, workdir) if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(workdir, DEFAULT_LOG_DIR) if parallel["type"] in ["local", "clusterk"]: _setup_resources() _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) elif parallel["type"] == "ipython": assert parallel["scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)" if parallel["scheduler"] != "sge": assert parallel["queue"] is not None, "IPython parallel requires a specified queue (-q)" elif not parallel["queue"]: parallel["queue"] = "" _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) else: raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
def get_post_process_yaml(data_dir, workdir): """Prepare a bcbio_system YAML file pointing to test data. """ try: from bcbiovm.docker.defaults import get_datadir datadir = data_dir or get_datadir() sys_conf_file = os.path.join(datadir, "galaxy", "bcbio_system.yaml") system = sys_conf_file if datadir else None except ImportError: system = None if system is None or not os.path.exists(system): try: _, system = load_system_config(config_file="bcbio_system.yaml", work_dir=workdir) except ValueError: system = None if system is None or not os.path.exists(system): system = os.path.join(data_dir, "post_process-sample.yaml") # create local config pointing to reduced genomes test_system = os.path.join(workdir, "bcbio_system.yaml") with open(system) as in_handle: config = yaml.load(in_handle) config["galaxy_config"] = os.path.join(data_dir, "universe_wsgi.ini") with open(test_system, "w") as out_handle: yaml.dump(config, out_handle) return test_system
def run_main(work_dir, config_file=None, fc_dir=None, run_info_yaml=None, numcores=None, paralleltype=None, queue=None, scheduler=None, upgrade=None, profile=None, workflow=None, inputs=None, resources="", timeout=15, retries=None): """Run variant analysis, handling command line options. """ config, config_file = load_system_config(config_file) if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(work_dir, "log") paralleltype, numcores = _get_cores_and_type(numcores, paralleltype, scheduler) parallel = {"type": paralleltype, "cores": numcores, "scheduler": scheduler, "queue": queue, "profile": profile, "module": "bcbio.distributed", "resources": resources, "timeout": timeout, "retries": retries} if parallel["type"] in ["local"]: _run_toplevel(config, config_file, work_dir, parallel, fc_dir, run_info_yaml) elif parallel["type"] == "ipython": assert parallel["queue"] is not None, "IPython parallel requires a specified queue (-q)" assert parallel["scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)" _run_toplevel(config, config_file, work_dir, parallel, fc_dir, run_info_yaml) else: raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
def get_post_process_yaml(data_dir, workdir): try: from bcbiovm.docker.defaults import get_datadir datadir = get_datadir() system = os.path.join(datadir, "galaxy", "bcbio_system.yaml") if datadir else None except ImportError: system = None if system is None or not os.path.exists(system): try: _, system = load_system_config("bcbio_system.yaml") except ValueError: system = None sample = os.path.join(data_dir, "post_process-sample.yaml") std = os.path.join(data_dir, "post_process.yaml") if os.path.exists(std): return std elif system and os.path.exists(system): # create local config pointing to reduced genomes test_system = os.path.join(workdir, os.path.basename(system)) with open(system) as in_handle: config = yaml.load(in_handle) config["galaxy_config"] = os.path.join(data_dir, "universe_wsgi.ini") with open(test_system, "w") as out_handle: yaml.dump(config, out_handle) return test_system else: return sample
def run_main(workdir, config_file=None, fc_dir=None, run_info_yaml=None, parallel=None, workflow=None): """Run variant analysis, handling command line options. """ os.chdir(workdir) config, config_file = load_system_config(config_file, workdir) if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(workdir, "log") if parallel["type"] in ["local", "clusterk"]: _setup_resources() _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) elif parallel["type"] == "ipython": assert parallel[ "scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)" if parallel["scheduler"] != "sge": assert parallel[ "queue"] is not None, "IPython parallel requires a specified queue (-q)" elif not parallel["queue"]: parallel["queue"] = "" _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) else: raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
def create_sample_config(data_dir, work_dir, disambiguate=False): system_config, system_file = config_utils.load_system_config(work_dir=work_dir) system_config["dirs"] = run_info.setup_directories(work_dir, work_dir, system_config, system_file) c = ConfigCreator(data_dir, work_dir, system_config) if disambiguate: return c.config_with_disambiguate() else: return c.config_without_disambiguate()
def organize_samples(run_info_yaml, bcbio_system, work_dir, fc_dir, config): """Externally callable function to read and organize configurations for samples. """ config, config_file = config_utils.load_system_config(bcbio_system, work_dir) if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(work_dir, "log") dirs = setup_directories(work_dir, fc_dir, config, config_file) return organize(dirs, config, run_info_yaml)
def _get_system_config(work_dir, system): try: _, system = load_system_config( config_file="bcbio_system.yaml", work_dir=work_dir ) except ValueError: system = None return system
def setup(args): template, template_txt = name_to_config(args.template) run_info.validate_yaml(template_txt, args.template) base_item = template["details"][0] project_name, metadata, global_vars, md_file = _pname_and_metadata( args.metadata) remotes = _retrieve_remote([args.metadata, args.template]) inputs = args.input_files + remotes.get( "inputs", []) + [fr for fr in metadata if objectstore.is_remote(fr)] if hasattr(args, "systemconfig") and args.systemconfig and hasattr( args, "integrations"): config, _ = config_utils.load_system_config(args.systemconfig) for iname, retriever in args.integrations.items(): if iname in config: inputs += retriever.get_files(metadata, config[iname]) raw_items = [ _add_metadata(item, metadata, remotes, args.only_metadata) for item in _prep_items_from_base(base_item, inputs, args.force_single) ] items = [x for x in raw_items if x] _check_all_metadata_found(metadata, items) out_dir = os.path.join(os.getcwd(), project_name) work_dir = utils.safe_makedir(os.path.join(out_dir, "work")) if hasattr(args, "relpaths") and args.relpaths: items = [_convert_to_relpaths(x, work_dir) for x in items] out_config_file = _write_template_config(template_txt, project_name, out_dir) if md_file: shutil.copyfile( md_file, os.path.join(out_dir, "config", os.path.basename(md_file))) items = _copy_to_configdir(items, out_dir) if len(items) == 0: print() print("Template configuration file created at: %s" % out_config_file) print( "Edit to finalize custom options, then prepare full sample config with:" ) print(" bcbio_nextgen.py -w template %s %s sample1.bam sample2.fq" % \ (out_config_file, project_name)) else: out_config_file = _write_config_file(items, global_vars, template, project_name, out_dir, remotes) print() print("Configuration file created at: %s" % out_config_file) print("Edit to finalize and run with:") print(" cd %s" % work_dir) print(" bcbio_nextgen.py ../config/%s" % os.path.basename(out_config_file)) if remotes.get("base"): remote_path = os.path.join(remotes["base"], os.path.basename(out_config_file)) s3.upload_file_boto(out_config_file, remote_path) print("Also uploaded to AWS S3 in %s" % remotes["base"]) print("Run directly with bcbio_vm.py run %s" % remote_path)
def setup_directories(work_dir, fc_dir, config, config_file): fastq_dir, galaxy_dir, config_dir = _get_full_paths(flowcell.get_fastq_dir(fc_dir) if fc_dir else None, config, config_file) # check default install for tool data if not found locally if not os.path.exists(os.path.join(galaxy_dir, "tool-data")): _, config_file = config_utils.load_system_config(work_dir=work_dir) if os.path.exists(os.path.join(os.path.dirname(config_file), "tool-data")): galaxy_dir = os.path.dirname(config_file) return {"fastq": fastq_dir, "galaxy": galaxy_dir, "work": work_dir, "flowcell": fc_dir, "config": config_dir}
def prep_system(run_info_yaml, bcbio_system=None): """Prepare system configuration information from an input configuration file. This does the work of parsing the system input file and setting up directories for use in 'organize'. """ work_dir = os.getcwd() config, config_file = config_utils.load_system_config(bcbio_system, work_dir) dirs = setup_directories(work_dir, os.path.normpath(os.path.dirname(os.path.dirname(run_info_yaml))), config, config_file) return [dirs, config, run_info_yaml]
def setup_directories(work_dir, fc_dir, config, config_file): fastq_dir, galaxy_dir, config_dir = _get_full_paths(flowcell.get_fastq_dir(fc_dir) if fc_dir else None, config, config_file) # check default install for tool data if not found locally if not os.path.exists(os.path.join(galaxy_dir, "tool-data")): _, config_file = config_utils.load_system_config(work_dir=work_dir, allow_missing=True) if config_file and os.path.exists(os.path.join(os.path.dirname(config_file), "tool-data")): galaxy_dir = os.path.dirname(config_file) return {"fastq": fastq_dir, "galaxy": galaxy_dir, "work": work_dir, "flowcell": fc_dir, "config": config_dir}
def main(config_file, fc_dir=None, run_info_yaml=None, numcores=None, paralleltype=None, queue=None, scheduler=None, upgrade=None, profile=None, workflow=None, inputs=None, resources="", timeout=15, retries=None): work_dir = os.getcwd() config, config_file = load_system_config(config_file) if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(work_dir, "log") paralleltype, numcores = _get_cores_and_type(config, fc_dir, run_info_yaml, numcores, paralleltype) parallel = { "type": paralleltype, "cores": numcores, "scheduler": scheduler, "queue": queue, "profile": profile, "module": "bcbio.distributed", "resources": resources, "timeout": timeout, "retries": retries } if parallel["type"] in ["local", "messaging-main"]: if numcores is None: config["algorithm"]["num_cores"] = numcores run_main(config, config_file, work_dir, parallel, fc_dir, run_info_yaml) elif parallel["type"] == "messaging": parallel["task_module"] = "bcbio.distributed.tasks" args = [config_file, fc_dir] if run_info_yaml: args.append(run_info_yaml) messaging.run_and_monitor(config, config_file, args, parallel) elif parallel["type"] == "ipython": assert parallel[ "queue"] is not None, "IPython parallel requires a specified queue (-q)" assert parallel[ "scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)" run_main(config, config_file, work_dir, parallel, fc_dir, run_info_yaml) else: raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
def setup(args): template, template_txt = name_to_config(args.template) run_info.validate_yaml(template_txt, args.template) base_item = template["details"][0] project_name, metadata, global_vars, md_file = _pname_and_metadata(args.metadata) remotes = _retrieve_remote([args.metadata, args.template]) inputs = args.input_files + remotes.get("inputs", []) + _find_remote_inputs(metadata) remote_retriever = None remote_config = None if hasattr(args, "systemconfig") and args.systemconfig and hasattr(args, "integrations"): config, _ = config_utils.load_system_config(args.systemconfig) for iname, retriever in args.integrations.items(): if iname in config: remote_retriever = retriever remote_config = remote_retriever.set_cache(config[iname]) inputs += remote_retriever.get_files(metadata, remote_config) raw_items = [_add_metadata(item, metadata, remotes, args.only_metadata) for item in _prep_items_from_base(base_item, inputs, metadata, args.separators.split(","), args.force_single)] items = [x for x in raw_items if x] _check_all_metadata_found(metadata, items) if remote_retriever and remote_config: items = remote_retriever.add_remotes(items, remote_config) out_dir = os.path.join(os.getcwd(), project_name) work_dir = utils.safe_makedir(os.path.join(out_dir, "work")) if hasattr(args, "relpaths") and args.relpaths: items = [_convert_to_relpaths(x, work_dir) for x in items] out_config_file = _write_template_config(template_txt, project_name, out_dir) if md_file: shutil.copyfile(md_file, os.path.join(out_dir, "config", os.path.basename(md_file))) items = _copy_to_configdir(items, out_dir) if len(items) == 0: print() print("Template configuration file created at: %s" % out_config_file) print("Edit to finalize custom options, then prepare full sample config with:") print(" bcbio_nextgen.py -w template %s %s sample1.bam sample2.fq" % \ (out_config_file, project_name)) else: out_config_file = _write_config_file(items, global_vars, template, project_name, out_dir, remotes) print() print("Configuration file created at: %s" % out_config_file) print("Edit to finalize and run with:") print(" cd %s" % work_dir) print(" bcbio_nextgen.py ../config/%s" % os.path.basename(out_config_file)) if remotes.get("base"): remote_path = os.path.join(remotes["base"], os.path.basename(out_config_file)) s3.upload_file_boto(out_config_file, remote_path) print("Also uploaded to AWS S3 in %s" % remotes["base"]) print("Run directly with bcbio_vm.py run %s" % remote_path)
def run_main(workdir, config_file=None, fc_dir=None, run_info_yaml=None, parallel=None, workflow=None): """Run variant analysis, handling command line options. """ os.chdir(workdir) config, config_file = load_system_config(config_file, workdir) if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(workdir, "log") if parallel["type"] in ["local"]: _setup_resources() _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) elif parallel["type"] == "ipython": assert parallel["queue"] is not None, "IPython parallel requires a specified queue (-q)" assert parallel["scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)" _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) else: raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
def _merge_system_configs(host_config, container_config, work_dir): """Create a merged system configuration from external and internal specification. """ out_file = os.path.join(work_dir, "web-bcbio_system.yaml") out, _ = config_utils.load_system_config(container_config) for k, v in host_config.iteritems(): if k in set(["galaxy_config"]): out[k] = v elif k == "resources": for pname, resources in v.iteritems(): for rname, rval in resources.iteritems(): if rname in set(["cores", "jvm_opts", "memory"]): if pname not in out[k]: out[k][pname] = {} out[k][pname][rname] = rval # Ensure final file is relocatable by mapping back to reference directory if "bcbio_system" in out and ("galaxy_config" not in out or not os.path.isabs(out["galaxy_config"])): out["galaxy_config"] = os.path.normpath(os.path.join(os.path.dirname(out["bcbio_system"]), os.pardir, "galaxy", "universe_wsgi.ini")) with open(out_file, "w") as out_handle: yaml.dump(out, out_handle, default_flow_style=False, allow_unicode=False) return out_file
def setUp(self): self._config_file = os.path.join(os.path.dirname(__file__), '../../config', 'bcbio_system.yaml' ) config, config_file = config_utils.load_system_config(self._config_file) self._picard = broad.runner_from_config(config)
def setUp(self): self._config_file = os.path.join(os.path.dirname(__file__), '../../config', 'bcbio_system.yaml') config, config_file = config_utils.load_system_config( self._config_file) self._picard = broad.runner_from_config(config)