def run_gatk(self, params, tmp_dir=None, log_error=True, data=None, region=None, memscale=None, parallel_gc=False, ld_preload=False): """Top level interface to running a GATK command. ld_preload injects required libraries for Java JNI calls: https://gatkforums.broadinstitute.org/gatk/discussion/8810/something-about-create-pon-workflow """ needs_java7 = LooseVersion(self.get_gatk_version()) < LooseVersion("3.6") # For old Java requirements use global java 7 if needs_java7: setpath.remove_bcbiopath() with tx_tmpdir(self._config) as local_tmp_dir: if tmp_dir is None: tmp_dir = local_tmp_dir cl = self.cl_gatk(params, tmp_dir, memscale=memscale, parallel_gc=parallel_gc) atype_index = params.index("-T") if params.count("-T") > 0 \ else params.index("--analysis_type") prog = params[atype_index + 1] cl = fix_missing_spark_user(cl, prog, params) if ld_preload: cl = "export LD_PRELOAD=%s/lib/libopenblas.so && %s" % (os.path.dirname(utils.get_bcbio_bin()), cl) do.run(cl, "GATK: {0}".format(prog), data, region=region, log_error=log_error) if needs_java7: setpath.prepend_bcbiopath()
def run_gatk(self, params, tmp_dir=None, log_error=True, data=None, region=None, memscale=None, parallel_gc=False, ld_preload=False): """Top level interface to running a GATK command. ld_preload injects required libraries for Java JNI calls: https://gatkforums.broadinstitute.org/gatk/discussion/8810/something-about-create-pon-workflow """ needs_java7 = LooseVersion(self.get_gatk_version()) < LooseVersion("3.6") # For old Java requirements use global java 7 if needs_java7: setpath.remove_bcbiopath() with tx_tmpdir(self._config) as local_tmp_dir: if tmp_dir is None: tmp_dir = local_tmp_dir cl = self.cl_gatk(params, tmp_dir, memscale=memscale, parallel_gc=parallel_gc) atype_index = params.index("-T") if params.count("-T") > 0 \ else params.index("--analysis_type") prog = params[atype_index + 1] cl = fix_missing_spark_user(cl, prog, params) if ld_preload: cl = "export LD_PRELOAD=%s/lib/libopenblas.so && %s" % (os.path.dirname(utils.get_bcbio_bin()), cl) do.run(cl, "GATK: {0}".format(prog), data, region=region, log_error=log_error) if needs_java7: setpath.prepend_bcbiopath()
def run_gatk(self, params, tmp_dir=None, log_error=True, data=None, region=None, memscale=None, parallel_gc=False): needs_java7 = LooseVersion( self.get_gatk_version()) < LooseVersion("3.6") # For old Java requirements use global java 7 if needs_java7: setpath.remove_bcbiopath() with tx_tmpdir(self._config) as local_tmp_dir: if tmp_dir is None: tmp_dir = local_tmp_dir cl = self.cl_gatk(params, tmp_dir, memscale=memscale, parallel_gc=parallel_gc) atype_index = params.index("-T") if params.count("-T") > 0 \ else params.index("--analysis_type") prog = params[atype_index + 1] cl = fix_missing_spark_user(cl, prog, params) do.run(cl, "GATK: {0}".format(prog), data, region=region, log_error=log_error) if needs_java7: setpath.prepend_bcbiopath()
def run(fn_name, items): setpath.prepend_bcbiopath() out = [] fn, fn_name = (fn_name, fn_name.__name__) if callable(fn_name) else ( _get_ipython_fn(fn_name, parallel), fn_name) items = [x for x in items if x is not None] items = diagnostics.track_parallel(items, fn_name) logger.info("ipython: %s" % fn_name) if len(items) > 0: items = [ config_utils.add_cores_to_config(x, parallel["cores_per_job"], parallel) for x in items ] if "wrapper" in parallel: wrap_parallel = { k: v for k, v in parallel.items() if k in set(["fresources"]) } items = [[fn_name] + parallel.get("wrapper_args", []) + [wrap_parallel] + list(x) for x in items] items = zip_args([args for args in items]) for data in view.map_sync(fn, items, track=False): if data: out.extend(unzip_args(data)) return out
def process(args): """Run the function in args.name given arguments in args.argfile. """ # Set environment to standard to use periods for decimals and avoid localization locale_to_use = utils.get_locale() os.environ["LC_ALL"] = locale_to_use os.environ["LC"] = locale_to_use os.environ["LANG"] = locale_to_use setpath.prepend_bcbiopath() try: fn = getattr(multitasks, args.name) except AttributeError: raise AttributeError( "Did not find exposed function in bcbio.distributed.multitasks named '%s'" % args.name) if args.moreargs or args.raw: fnargs = [args.argfile] + args.moreargs work_dir = None argfile = None else: with open(args.argfile) as in_handle: fnargs = yaml.safe_load(in_handle) work_dir = os.path.dirname(args.argfile) fnargs = config_utils.merge_resources(fnargs) argfile = args.outfile if args.outfile else "%s-out%s" % os.path.splitext( args.argfile) if not work_dir: work_dir = os.getcwd() if len(fnargs) > 0 and fnargs[0] == "cwl": fnargs, parallel, out_keys, input_files = _world_from_cwl( args.name, fnargs[1:], work_dir) # Can remove this awkward Docker merge when we do not need custom GATK3 installs fnargs = config_utils.merge_resources(fnargs) argfile = os.path.join(work_dir, "cwl.output.json") else: parallel, out_keys, input_files = None, {}, [] with utils.chdir(work_dir): with contextlib.closing( log.setup_local_logging(parallel={"wrapper": "runfn"})): try: out = fn(*fnargs) except: logger.exception() raise finally: # Clean up any copied and unpacked workflow inputs, avoiding extra disk usage wf_input_dir = os.path.join(work_dir, "wf-inputs") if os.path.exists(wf_input_dir) and os.path.isdir( wf_input_dir): shutil.rmtree(wf_input_dir) if argfile: try: _write_out_argfile(argfile, out, fnargs, parallel, out_keys, input_files, work_dir) except: logger.exception() raise
def process(args): """Run the function in args.name given arguments in args.argfile. """ # Set environment to standard to use periods for decimals and avoid localization os.environ["LC_ALL"] = "C" os.environ["LC"] = "C" os.environ["LANG"] = "C" setpath.prepend_bcbiopath() try: fn = getattr(multitasks, args.name) except AttributeError: raise AttributeError("Did not find exposed function in bcbio.distributed.multitasks named '%s'" % args.name) if args.moreargs or args.raw: fnargs = [args.argfile] + args.moreargs work_dir = None argfile = None else: with open(args.argfile) as in_handle: fnargs = yaml.safe_load(in_handle) work_dir = os.path.dirname(args.argfile) fnargs = config_utils.merge_resources(fnargs) argfile = args.outfile if args.outfile else "%s-out%s" % os.path.splitext(args.argfile) if not work_dir: work_dir = os.getcwd() if len(fnargs) > 0 and fnargs[0] == "cwl": fnargs, parallel, out_keys, input_files = _world_from_cwl(args.name, fnargs[1:], work_dir) # Can remove this awkward Docker merge when we do not need custom GATK3 installs fnargs = config_utils.merge_resources(fnargs) argfile = os.path.join(work_dir, "cwl.output.json") else: parallel, out_keys, input_files = None, {}, [] with utils.chdir(work_dir): with contextlib.closing(log.setup_local_logging(parallel={"wrapper": "runfn"})): try: out = fn(*fnargs) except: logger.exception() raise finally: # Clean up any copied and unpacked workflow inputs, avoiding extra disk usage wf_input_dir = os.path.join(work_dir, "wf-inputs") if os.path.exists(wf_input_dir) and os.path.isdir(wf_input_dir): shutil.rmtree(wf_input_dir) if argfile: try: _write_out_argfile(argfile, out, fnargs, parallel, out_keys, input_files, work_dir) except: logger.exception() raise
def run(fn_name, items): setpath.prepend_bcbiopath() out = [] fn, fn_name = (fn_name, fn_name.__name__) if callable(fn_name) else (_get_ipython_fn(fn_name, parallel), fn_name) items = [x for x in items if x is not None] items = diagnostics.track_parallel(items, fn_name) logger.info("ipython: %s" % fn_name) if len(items) > 0: items = [config_utils.add_cores_to_config(x, parallel["cores_per_job"], parallel) for x in items] if "wrapper" in parallel: wrap_parallel = {k: v for k, v in parallel.items() if k in set(["fresources"])} items = [[fn_name] + parallel.get("wrapper_args", []) + [wrap_parallel] + list(x) for x in items] items = zip_args([args for args in items]) for data in view.map_sync(fn, items, track=False): if data: out.extend(unzip_args(data)) return out
Usage: bcbio_nextgen.py <config_file> [<fc_dir>] [<run_info_yaml>] -t type of parallelization to use: - local: Non-distributed, possibly multiple if n > 1 (default) - ipython: IPython distributed processing -n total number of processes to use -s scheduler for ipython parallelization (lsf, sge, slurm, torque, pbspro) -q queue to submit jobs for ipython parallelization """ import os import argparse import sys from bcbio.setpath import prepend_bcbiopath prepend_bcbiopath() from bcbio import install, utils, workflow from bcbio.illumina import machine from bcbio.distributed import runfn, clargs from bcbio.pipeline.main import run_main from bcbio.server import main as server_main from bcbio.graph import graph from bcbio.provenance import programs from bcbio.pipeline import version def main(**kwargs): run_main(**kwargs)
Usage: bcbio_nextgen.py <config_file> [<fc_dir>] [<run_info_yaml>] -t type of parallelization to use: - local: Non-distributed, possibly multiple if n > 1 (default) - ipython: IPython distributed processing -n total number of processes to use -s scheduler for ipython parallelization (lsf, sge, slurm, torque, pbspro) -q queue to submit jobs for ipython parallelization """ from __future__ import print_function import os import argparse import sys from bcbio.setpath import prepend_bcbiopath prepend_bcbiopath() from bcbio import install, utils, workflow from bcbio.illumina import machine from bcbio.distributed import runfn, clargs from bcbio.pipeline.main import run_main from bcbio.graph import graph from bcbio.provenance import programs from bcbio.pipeline import version def main(**kwargs): run_main(**kwargs) def parse_cl_args(in_args): """Parse input commandline arguments, handling multiple cases.