コード例 #1
0
    def run_gatk(self, params, tmp_dir=None, log_error=True,
                 data=None, region=None, memscale=None, parallel_gc=False, ld_preload=False):
        """Top level interface to running a GATK command.

        ld_preload injects required libraries for Java JNI calls:
        https://gatkforums.broadinstitute.org/gatk/discussion/8810/something-about-create-pon-workflow
        """
        needs_java7 = LooseVersion(self.get_gatk_version()) < LooseVersion("3.6")
        # For old Java requirements use global java 7
        if needs_java7:
            setpath.remove_bcbiopath()
        with tx_tmpdir(self._config) as local_tmp_dir:
            if tmp_dir is None:
                tmp_dir = local_tmp_dir
            cl = self.cl_gatk(params, tmp_dir, memscale=memscale, parallel_gc=parallel_gc)
            atype_index = params.index("-T") if params.count("-T") > 0 \
                          else params.index("--analysis_type")
            prog = params[atype_index + 1]
            cl = fix_missing_spark_user(cl, prog, params)
            if ld_preload:
                cl = "export LD_PRELOAD=%s/lib/libopenblas.so && %s" % (os.path.dirname(utils.get_bcbio_bin()), cl)
            do.run(cl, "GATK: {0}".format(prog), data, region=region,
                   log_error=log_error)
        if needs_java7:
            setpath.prepend_bcbiopath()
コード例 #2
0
ファイル: __init__.py プロジェクト: lbeltrame/bcbio-nextgen
    def run_gatk(self, params, tmp_dir=None, log_error=True,
                 data=None, region=None, memscale=None, parallel_gc=False, ld_preload=False):
        """Top level interface to running a GATK command.

        ld_preload injects required libraries for Java JNI calls:
        https://gatkforums.broadinstitute.org/gatk/discussion/8810/something-about-create-pon-workflow
        """
        needs_java7 = LooseVersion(self.get_gatk_version()) < LooseVersion("3.6")
        # For old Java requirements use global java 7
        if needs_java7:
            setpath.remove_bcbiopath()
        with tx_tmpdir(self._config) as local_tmp_dir:
            if tmp_dir is None:
                tmp_dir = local_tmp_dir
            cl = self.cl_gatk(params, tmp_dir, memscale=memscale, parallel_gc=parallel_gc)
            atype_index = params.index("-T") if params.count("-T") > 0 \
                          else params.index("--analysis_type")
            prog = params[atype_index + 1]
            cl = fix_missing_spark_user(cl, prog, params)
            if ld_preload:
                cl = "export LD_PRELOAD=%s/lib/libopenblas.so && %s" % (os.path.dirname(utils.get_bcbio_bin()), cl)
            do.run(cl, "GATK: {0}".format(prog), data, region=region,
                   log_error=log_error)
        if needs_java7:
            setpath.prepend_bcbiopath()
コード例 #3
0
 def run_gatk(self,
              params,
              tmp_dir=None,
              log_error=True,
              data=None,
              region=None,
              memscale=None,
              parallel_gc=False):
     needs_java7 = LooseVersion(
         self.get_gatk_version()) < LooseVersion("3.6")
     # For old Java requirements use global java 7
     if needs_java7:
         setpath.remove_bcbiopath()
     with tx_tmpdir(self._config) as local_tmp_dir:
         if tmp_dir is None:
             tmp_dir = local_tmp_dir
         cl = self.cl_gatk(params,
                           tmp_dir,
                           memscale=memscale,
                           parallel_gc=parallel_gc)
         atype_index = params.index("-T") if params.count("-T") > 0 \
                       else params.index("--analysis_type")
         prog = params[atype_index + 1]
         cl = fix_missing_spark_user(cl, prog, params)
         do.run(cl,
                "GATK: {0}".format(prog),
                data,
                region=region,
                log_error=log_error)
     if needs_java7:
         setpath.prepend_bcbiopath()
コード例 #4
0
 def run(fn_name, items):
     setpath.prepend_bcbiopath()
     out = []
     fn, fn_name = (fn_name, fn_name.__name__) if callable(fn_name) else (
         _get_ipython_fn(fn_name, parallel), fn_name)
     items = [x for x in items if x is not None]
     items = diagnostics.track_parallel(items, fn_name)
     logger.info("ipython: %s" % fn_name)
     if len(items) > 0:
         items = [
             config_utils.add_cores_to_config(x, parallel["cores_per_job"],
                                              parallel) for x in items
         ]
         if "wrapper" in parallel:
             wrap_parallel = {
                 k: v
                 for k, v in parallel.items() if k in set(["fresources"])
             }
             items = [[fn_name] + parallel.get("wrapper_args", []) +
                      [wrap_parallel] + list(x) for x in items]
         items = zip_args([args for args in items])
         for data in view.map_sync(fn, items, track=False):
             if data:
                 out.extend(unzip_args(data))
     return out
コード例 #5
0
def process(args):
    """Run the function in args.name given arguments in args.argfile.
    """
    # Set environment to standard to use periods for decimals and avoid localization
    locale_to_use = utils.get_locale()
    os.environ["LC_ALL"] = locale_to_use
    os.environ["LC"] = locale_to_use
    os.environ["LANG"] = locale_to_use
    setpath.prepend_bcbiopath()
    try:
        fn = getattr(multitasks, args.name)
    except AttributeError:
        raise AttributeError(
            "Did not find exposed function in bcbio.distributed.multitasks named '%s'"
            % args.name)
    if args.moreargs or args.raw:
        fnargs = [args.argfile] + args.moreargs
        work_dir = None
        argfile = None
    else:
        with open(args.argfile) as in_handle:
            fnargs = yaml.safe_load(in_handle)
        work_dir = os.path.dirname(args.argfile)
        fnargs = config_utils.merge_resources(fnargs)
        argfile = args.outfile if args.outfile else "%s-out%s" % os.path.splitext(
            args.argfile)
    if not work_dir:
        work_dir = os.getcwd()
    if len(fnargs) > 0 and fnargs[0] == "cwl":
        fnargs, parallel, out_keys, input_files = _world_from_cwl(
            args.name, fnargs[1:], work_dir)
        # Can remove this awkward Docker merge when we do not need custom GATK3 installs
        fnargs = config_utils.merge_resources(fnargs)
        argfile = os.path.join(work_dir, "cwl.output.json")
    else:
        parallel, out_keys, input_files = None, {}, []
    with utils.chdir(work_dir):
        with contextlib.closing(
                log.setup_local_logging(parallel={"wrapper": "runfn"})):
            try:
                out = fn(*fnargs)
            except:
                logger.exception()
                raise
            finally:
                # Clean up any copied and unpacked workflow inputs, avoiding extra disk usage
                wf_input_dir = os.path.join(work_dir, "wf-inputs")
                if os.path.exists(wf_input_dir) and os.path.isdir(
                        wf_input_dir):
                    shutil.rmtree(wf_input_dir)
    if argfile:
        try:
            _write_out_argfile(argfile, out, fnargs, parallel, out_keys,
                               input_files, work_dir)
        except:
            logger.exception()
            raise
コード例 #6
0
ファイル: runfn.py プロジェクト: chapmanb/bcbio-nextgen
def process(args):
    """Run the function in args.name given arguments in args.argfile.
    """
    # Set environment to standard to use periods for decimals and avoid localization
    os.environ["LC_ALL"] = "C"
    os.environ["LC"] = "C"
    os.environ["LANG"] = "C"
    setpath.prepend_bcbiopath()
    try:
        fn = getattr(multitasks, args.name)
    except AttributeError:
        raise AttributeError("Did not find exposed function in bcbio.distributed.multitasks named '%s'" % args.name)
    if args.moreargs or args.raw:
        fnargs = [args.argfile] + args.moreargs
        work_dir = None
        argfile = None
    else:
        with open(args.argfile) as in_handle:
            fnargs = yaml.safe_load(in_handle)
        work_dir = os.path.dirname(args.argfile)
        fnargs = config_utils.merge_resources(fnargs)
        argfile = args.outfile if args.outfile else "%s-out%s" % os.path.splitext(args.argfile)
    if not work_dir:
        work_dir = os.getcwd()
    if len(fnargs) > 0 and fnargs[0] == "cwl":
        fnargs, parallel, out_keys, input_files = _world_from_cwl(args.name, fnargs[1:], work_dir)
        # Can remove this awkward Docker merge when we do not need custom GATK3 installs
        fnargs = config_utils.merge_resources(fnargs)
        argfile = os.path.join(work_dir, "cwl.output.json")
    else:
        parallel, out_keys, input_files = None, {}, []
    with utils.chdir(work_dir):
        with contextlib.closing(log.setup_local_logging(parallel={"wrapper": "runfn"})):
            try:
                out = fn(*fnargs)
            except:
                logger.exception()
                raise
            finally:
                # Clean up any copied and unpacked workflow inputs, avoiding extra disk usage
                wf_input_dir = os.path.join(work_dir, "wf-inputs")
                if os.path.exists(wf_input_dir) and os.path.isdir(wf_input_dir):
                    shutil.rmtree(wf_input_dir)
    if argfile:
        try:
            _write_out_argfile(argfile, out, fnargs, parallel, out_keys, input_files, work_dir)
        except:
            logger.exception()
            raise
コード例 #7
0
ファイル: ipython.py プロジェクト: chapmanb/bcbio-nextgen
 def run(fn_name, items):
     setpath.prepend_bcbiopath()
     out = []
     fn, fn_name = (fn_name, fn_name.__name__) if callable(fn_name) else (_get_ipython_fn(fn_name, parallel), fn_name)
     items = [x for x in items if x is not None]
     items = diagnostics.track_parallel(items, fn_name)
     logger.info("ipython: %s" % fn_name)
     if len(items) > 0:
         items = [config_utils.add_cores_to_config(x, parallel["cores_per_job"], parallel) for x in items]
         if "wrapper" in parallel:
             wrap_parallel = {k: v for k, v in parallel.items() if k in set(["fresources"])}
             items = [[fn_name] + parallel.get("wrapper_args", []) + [wrap_parallel] + list(x) for x in items]
         items = zip_args([args for args in items])
         for data in view.map_sync(fn, items, track=False):
             if data:
                 out.extend(unzip_args(data))
     return out
コード例 #8
0
Usage:
  bcbio_nextgen.py <config_file> [<fc_dir>] [<run_info_yaml>]
     -t type of parallelization to use:
          - local: Non-distributed, possibly multiple if n > 1 (default)
          - ipython: IPython distributed processing
     -n total number of processes to use
     -s scheduler for ipython parallelization (lsf, sge, slurm, torque, pbspro)
     -q queue to submit jobs for ipython parallelization
"""
import os
import argparse
import sys

from bcbio.setpath import prepend_bcbiopath

prepend_bcbiopath()

from bcbio import install, utils, workflow
from bcbio.illumina import machine
from bcbio.distributed import runfn, clargs
from bcbio.pipeline.main import run_main
from bcbio.server import main as server_main
from bcbio.graph import graph
from bcbio.provenance import programs
from bcbio.pipeline import version


def main(**kwargs):
    run_main(**kwargs)

コード例 #9
0
Usage:
  bcbio_nextgen.py <config_file> [<fc_dir>] [<run_info_yaml>]
     -t type of parallelization to use:
          - local: Non-distributed, possibly multiple if n > 1 (default)
          - ipython: IPython distributed processing
     -n total number of processes to use
     -s scheduler for ipython parallelization (lsf, sge, slurm, torque, pbspro)
     -q queue to submit jobs for ipython parallelization
"""
from __future__ import print_function
import os
import argparse
import sys

from bcbio.setpath import prepend_bcbiopath
prepend_bcbiopath()
  
from bcbio import install, utils, workflow
from bcbio.illumina import machine
from bcbio.distributed import runfn, clargs
from bcbio.pipeline.main import run_main
from bcbio.graph import graph
from bcbio.provenance import programs
from bcbio.pipeline import version

def main(**kwargs):
    run_main(**kwargs)

def parse_cl_args(in_args):
    """Parse input commandline arguments, handling multiple cases.