def _run_delly(bam_files, chrom, ref_file, work_dir, items): """Run delly, calling structural variations for the specified type. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join( work_dir, "%s%s-%s.bcf" % (os.path.splitext(os.path.basename(bam_files[0]))[0], ext, chrom)) final_file = "%s.vcf.gz" % (utils.splitext_plus(out_file)[0]) cores = min( utils.get_in(items[0], ("config", "algorithm", "num_cores"), 1), len(bam_files)) if not utils.file_exists(out_file) and not utils.file_exists(final_file): with file_transaction(items[0], out_file) as tx_out_file: if sshared.has_variant_regions(items, out_file, chrom): exclude = ["-x", _delly_exclude_file(items, out_file, chrom)] cmd = ["delly", "call", "-g", ref_file, "-o", tx_out_file ] + exclude + bam_files locale_to_use = utils.get_locale() multi_cmd = "export OMP_NUM_THREADS=%s && export LC_ALL=%s && " % ( cores, locale_to_use) try: do.run(multi_cmd + " ".join(cmd), "delly structural variant") except subprocess.CalledProcessError as msg: # Small input samples, write an empty vcf if "Sample has not enough data to estimate library parameters" in str( msg): pass # delly returns an error exit code if there are no variants elif "No structural variants found" not in str(msg): raise return [_bgzip_and_clean(out_file, items)]
def _setup_logging(args): # Set environment to standard to use periods for decimals and avoid localization locale_to_use = utils.get_locale() os.environ["LC_ALL"] = locale_to_use os.environ["LC"] = locale_to_use os.environ["LANG"] = locale_to_use config = None if len(args) == 1 and isinstance(args[0], (list, tuple)): args = args[0] for arg in args: if config_utils.is_nested_config_arg(arg): config = arg["config"] break elif config_utils.is_std_config_arg(arg): config = arg break elif isinstance(arg, (list, tuple)) and config_utils.is_nested_config_arg( arg[0]): config = arg[0]["config"] break if config is None: raise NotImplementedError("No config found in arguments: %s" % args[0]) handler = setup_local_logging(config, config.get("parallel", {})) try: yield config except: logger.exception("Unexpected error") raise finally: if hasattr(handler, "close"): handler.close()
def run_main(workdir, config_file=None, fc_dir=None, run_info_yaml=None, parallel=None, workflow=None): """Run variant analysis, handling command line options. """ # Set environment to standard to use periods for decimals and avoid localization locale_to_use = utils.get_locale() os.environ["LC_ALL"] = locale_to_use os.environ["LC"] = locale_to_use os.environ["LANG"] = locale_to_use workdir = utils.safe_makedir(os.path.abspath(workdir)) os.chdir(workdir) config, config_file = config_utils.load_system_config(config_file, workdir) parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) logger.info(f"System YAML configuration: {os.path.abspath(config_file)}.") logger.info(f"Locale set to {locale_to_use}.") if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(workdir, DEFAULT_LOG_DIR) if parallel["type"] in ["local", "clusterk"]: _setup_resources() _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) elif parallel["type"] == "ipython": assert parallel["scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)" if parallel["scheduler"] != "sge": assert parallel["queue"] is not None, "IPython parallel requires a specified queue (-q)" elif not parallel["queue"]: parallel["queue"] = "" _run_toplevel(config, config_file, workdir, parallel, fc_dir, run_info_yaml) else: raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
def process(args): """Run the function in args.name given arguments in args.argfile. """ # Set environment to standard to use periods for decimals and avoid localization locale_to_use = utils.get_locale() os.environ["LC_ALL"] = locale_to_use os.environ["LC"] = locale_to_use os.environ["LANG"] = locale_to_use setpath.prepend_bcbiopath() try: fn = getattr(multitasks, args.name) except AttributeError: raise AttributeError( "Did not find exposed function in bcbio.distributed.multitasks named '%s'" % args.name) if args.moreargs or args.raw: fnargs = [args.argfile] + args.moreargs work_dir = None argfile = None else: with open(args.argfile) as in_handle: fnargs = yaml.safe_load(in_handle) work_dir = os.path.dirname(args.argfile) fnargs = config_utils.merge_resources(fnargs) argfile = args.outfile if args.outfile else "%s-out%s" % os.path.splitext( args.argfile) if not work_dir: work_dir = os.getcwd() if len(fnargs) > 0 and fnargs[0] == "cwl": fnargs, parallel, out_keys, input_files = _world_from_cwl( args.name, fnargs[1:], work_dir) # Can remove this awkward Docker merge when we do not need custom GATK3 installs fnargs = config_utils.merge_resources(fnargs) argfile = os.path.join(work_dir, "cwl.output.json") else: parallel, out_keys, input_files = None, {}, [] with utils.chdir(work_dir): with contextlib.closing( log.setup_local_logging(parallel={"wrapper": "runfn"})): try: out = fn(*fnargs) except: logger.exception() raise finally: # Clean up any copied and unpacked workflow inputs, avoiding extra disk usage wf_input_dir = os.path.join(work_dir, "wf-inputs") if os.path.exists(wf_input_dir) and os.path.isdir( wf_input_dir): shutil.rmtree(wf_input_dir) if argfile: try: _write_out_argfile(argfile, out, fnargs, parallel, out_keys, input_files, work_dir) except: logger.exception() raise