def get(self): rargs = yaml.safe_load( StringIO.StringIO(str(self.get_argument("args", "{}")))) system_config = args.config or "bcbio_system.yaml" if "system_config" in rargs: system_config = os.path.join(rargs["work_dir"], "web-system_config.yaml") with open(system_config, "w") as out_handle: yaml.dump(rargs["system_config"], out_handle, default_flow_style=False, allow_unicode=False) if "sample_config" in rargs: sample_config = os.path.join(rargs["work_dir"], "web-sample_config.yaml") with open(sample_config, "w") as out_handle: yaml.dump(rargs["sample_config"], out_handle, default_flow_style=False, allow_unicode=False) else: sample_config = rargs.get("run_config") kwargs = { "work_dir": rargs["work_dir"], "config_file": system_config, "run_info_yaml": sample_config, "fc_dir": rargs.get("fc_dir"), "parallel": clargs.to_parallel(_rargs_to_parallel_args(rargs)), "app": self.application } run_id = yield tornado.gen.Task(run_bcbio_nextgen, **kwargs) self.write(run_id) self.finish()
def cmd_ipython(args): args = defaults.update_check_args(args, "Could not run IPython parallel analysis.") args = install.docker_image_arg(args) parallel = clargs.to_parallel(args, "bcbiovm.docker") parallel["wrapper"] = "runfn" with open(args.sample_config) as in_handle: ready_config, _ = mounts.normalize_config(yaml.load(in_handle), args.fcdir) work_dir = os.getcwd() ready_config_file = os.path.join(work_dir, "%s-ready%s" % (os.path.splitext(os.path.basename(args.sample_config)))) with open(ready_config_file, "w") as out_handle: yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False) work_dir = os.getcwd() systemconfig = run.local_system_config(args.systemconfig, args.datadir, work_dir) cur_pack = pack.shared_filesystem(work_dir, args.datadir, args.tmpdir) parallel["wrapper_args"] = [devel.DOCKER, {"sample_config": ready_config_file, "fcdir": args.fcdir, "pack": cur_pack, "systemconfig": systemconfig, "image": args.image}] # For testing, run on a local ipython cluster parallel["run_local"] = parallel.get("queue") == "localrun" from bcbio.pipeline import main main.run_main(work_dir, run_info_yaml=ready_config_file, config_file=systemconfig, fc_dir=args.fcdir, parallel=parallel)
def cmd_ipython(args): args = defaults.update_check_args(args, "Could not run IPython parallel analysis.") args = install.docker_image_arg(args) parallel = clargs.to_parallel(args, "bcbiovm.docker") parallel["wrapper"] = "runfn" with open(args.sample_config) as in_handle: ready_config, _ = mounts.normalize_config(yaml.safe_load(in_handle), args.fcdir) work_dir = os.getcwd() ready_config_file = os.path.join(work_dir, "%s-ready%s" % (os.path.splitext(os.path.basename(args.sample_config)))) with open(ready_config_file, "w") as out_handle: yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False) work_dir = os.getcwd() systemconfig = run.local_system_config(args.systemconfig, args.datadir, work_dir) cur_pack = pack.shared_filesystem(work_dir, args.datadir, args.tmpdir) parallel["wrapper_args"] = [devel.DOCKER, {"sample_config": ready_config_file, "fcdir": args.fcdir, "pack": cur_pack, "systemconfig": systemconfig, "image": args.image}] # For testing, run on a local ipython cluster parallel["run_local"] = parallel.get("queue") == "localrun" from bcbio.pipeline import main main.run_main(work_dir, run_info_yaml=ready_config_file, config_file=systemconfig, fc_dir=args.fcdir, parallel=parallel)
def cmd_ipython(args): args = defaults.update_check_args(args, "Could not run IPython parallel analysis.") args = install.docker_image_arg(args) parallel = clargs.to_parallel(args, "bcbiovm.docker") parallel["wrapper"] = "runfn" with open(args.sample_config) as in_handle: ready_config, _ = mounts.normalize_config(yaml.load(in_handle), args.fcdir) work_dir = os.getcwd() ready_config_file = os.path.join(work_dir, "%s-ready%s" % (os.path.splitext(os.path.basename(args.sample_config)))) with open(ready_config_file, "w") as out_handle: yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False) parallel["wrapper_args"] = [DOCKER, {"sample_config": ready_config_file, "fcdir": args.fcdir, "pack": pack.shared_filesystem(work_dir, args.datadir, args.tmpdir), "systemconfig": args.systemconfig, "image": args.image}] # For testing, run on a local ipython cluster parallel["run_local"] = parallel.get("queue") == "localrun" workdir_mount = "%s:%s" % (work_dir, DOCKER["work_dir"]) manage.run_bcbio_cmd(args.image, [workdir_mount], ["version", "--workdir=%s" % DOCKER["work_dir"]]) main.run_main(work_dir, run_info_yaml=ready_config_file, config_file=args.systemconfig, fc_dir=args.fcdir, parallel=parallel)
def _calculate_resources(data, args, resources): parallel = clargs.to_parallel(args) config = data[0][0]['config'] config['resources'].update({resources['name']: {'memory': "%sg" % resources['mem'], 'cores': resources['cores']}}) parallel.update({'progs': [resources['name']]}) # parallel = log.create_base_logger(config, parallel) # log.setup_local_logging(config, parallel) log.setup_log(config, parallel) dirs = {'work': os.path.abspath(os.getcwd())} system.write_info(dirs, parallel, config) sysinfo = system.machine_info()[0] log.logger.info("Number of items %s" % len(data)) parallel = res.calculate(parallel, data, sysinfo, config) log.logger.info(parallel) # print parallel # raise return parallel
def get(self): rargs = yaml.safe_load(StringIO(str(self.get_argument("args", "{}")))) system_config = args.config or "bcbio_system.yaml" if "system_config" in rargs: system_config = os.path.join(rargs["work_dir"], "web-system_config.yaml") with open(system_config, "w") as out_handle: yaml.safe_dump(rargs["system_config"], out_handle, default_flow_style=False, allow_unicode=False) if "sample_config" in rargs: sample_config = os.path.join(rargs["work_dir"], "web-sample_config.yaml") with open(sample_config, "w") as out_handle: yaml.safe_dump(rargs["sample_config"], out_handle, default_flow_style=False, allow_unicode=False) else: sample_config = rargs.get("run_config") kwargs = {"workdir": rargs["work_dir"], "config_file": system_config, "run_info_yaml": sample_config, "fc_dir": rargs.get("fc_dir"), "parallel": clargs.to_parallel(_rargs_to_parallel_args(rargs, args)), "app": self.application} run_id = yield tornado.gen.Task(run_bcbio_nextgen, **kwargs) self.write(run_id) self.finish()
def parse_cl_args(in_args): """Parse input commandline arguments, handling multiple cases. Returns the main config file and set of kwargs. """ sub_cmds = {"upgrade": install.add_subparser, "server": server_main.add_subparser, "runfn": runfn.add_subparser, "graph": graph.add_subparser, "version": programs.add_subparser, "sequencer": machine.add_subparser} description = "Community developed high throughput sequencing analysis." parser = argparse.ArgumentParser(description=description) sub_cmd = None if len(in_args) > 0 and in_args[0] in sub_cmds: subparser_help = "bcbio-nextgen supplemental commands" subparsers = parser.add_subparsers(help=subparser_help) sub_cmds[in_args[0]](subparsers) sub_cmd = in_args[0] else: parser.add_argument("global_config", nargs="?", help=("Global YAML configuration file specifying " "details about the system (optional, " "defaults to installed bcbio_system.yaml)")) parser.add_argument("fc_dir", nargs="?", help=("A directory of Illumina output or fastq " "files to process (optional)")) parser.add_argument("run_config", nargs="*", help=("YAML file with details about samples to " "process (required, unless using Galaxy " "LIMS as input)")), parser.add_argument("-n", "--numcores", type=int, default=1, help="Total cores to use for processing") parser.add_argument("-t", "--paralleltype", choices=["local", "ipython"], default="local", help="Approach to parallelization") parser.add_argument("-s", "--scheduler", choices=["lsf", "sge", "torque", "slurm", "pbspro"], help="Scheduler to use for ipython parallel") parser.add_argument("--local_controller", default=False, action="store_true", help="run controller locally") parser.add_argument("-q", "--queue", help=("Scheduler queue to run jobs on, for " "ipython parallel")) parser.add_argument("-r", "--resources", help=("Cluster specific resources specifications. " "Can be specified multiple times.\n" "Supports SGE, Torque, LSF and SLURM " "parameters."), default=[], action="append") parser.add_argument("--timeout", default=15, type=int, help=("Number of minutes before cluster startup " "times out. Defaults to 15")) parser.add_argument("--retries", default=0, type=int, help=("Number of retries of failed tasks during " "distributed processing. Default 0 " "(no retries)")) parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster", default="") parser.add_argument("-w", "--workflow", help=("Run a workflow with the given commandline " "arguments")) parser.add_argument("--workdir", default=os.getcwd(), help=("Directory to process in. Defaults to " "current working directory")) parser.add_argument("-v", "--version", help="Print current version", action="store_true") # Hidden arguments passed downstream parser.add_argument("--only-metadata", help=argparse.SUPPRESS, action="store_true", default=False) args = parser.parse_args(in_args) if hasattr(args, "workdir"): args.workdir = utils.safe_makedir(os.path.abspath(args.workdir)) if hasattr(args, "global_config"): error_msg = _sanity_check_args(args) if error_msg: parser.error(error_msg) kwargs = {"parallel": clargs.to_parallel(args), "workflow": args.workflow, "workdir": args.workdir} kwargs = _add_inputs_to_kwargs(args, kwargs, parser) error_msg = _sanity_check_kwargs(kwargs) if error_msg: parser.error(error_msg) else: assert sub_cmd is not None kwargs = {"args": args, "config_file": None, sub_cmd: True} return kwargs
def parse_cl_args(in_args): """Parse input commandline arguments, handling multiple cases. Returns the main config file and set of kwargs. """ sub_cmds = { "upgrade": install.add_subparser, "server": server_main.add_subparser, "runfn": runfn.add_subparser, "graph": graph.add_subparser, "version": programs.add_subparser, "sequencer": machine.add_subparser } description = "Community developed high throughput sequencing analysis." parser = argparse.ArgumentParser(description=description) sub_cmd = None if len(in_args) > 0 and in_args[0] in sub_cmds: subparser_help = "bcbio-nextgen supplemental commands" subparsers = parser.add_subparsers(help=subparser_help) sub_cmds[in_args[0]](subparsers) sub_cmd = in_args[0] else: parser.add_argument("global_config", nargs="?", help=("Global YAML configuration file specifying " "details about the system (optional, " "defaults to installed bcbio_system.yaml)")) parser.add_argument("fc_dir", nargs="?", help=("A directory of Illumina output or fastq " "files to process (optional)")) parser.add_argument("run_config", nargs="*", help=("YAML file with details about samples to " "process (required, unless using Galaxy " "LIMS as input)")), parser.add_argument("-n", "--numcores", type=int, default=1, help="Total cores to use for processing") parser.add_argument("-t", "--paralleltype", choices=["local", "ipython"], default="local", help="Approach to parallelization") parser.add_argument( "-s", "--scheduler", choices=["lsf", "sge", "torque", "slurm", "pbspro"], help="Scheduler to use for ipython parallel") parser.add_argument("--local_controller", default=False, action="store_true", help="run controller locally") parser.add_argument("-q", "--queue", help=("Scheduler queue to run jobs on, for " "ipython parallel")) parser.add_argument("-r", "--resources", help=("Cluster specific resources specifications. " "Can be specified multiple times.\n" "Supports SGE, Torque, LSF and SLURM " "parameters."), default=[], action="append") parser.add_argument("--timeout", default=15, type=int, help=("Number of minutes before cluster startup " "times out. Defaults to 15")) parser.add_argument("--retries", default=0, type=int, help=("Number of retries of failed tasks during " "distributed processing. Default 0 " "(no retries)")) parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster", default="") parser.add_argument("-w", "--workflow", help=("Run a workflow with the given commandline " "arguments")) parser.add_argument("--workdir", default=os.getcwd(), help=("Directory to process in. Defaults to " "current working directory")) parser.add_argument("-v", "--version", help="Print current version", action="store_true") # Hidden arguments passed downstream parser.add_argument("--only-metadata", help=argparse.SUPPRESS, action="store_true", default=False) parser.add_argument("--force-single", help="Treat all files as single reads", action="store_true", default=False) args = parser.parse_args(in_args) if hasattr(args, "workdir") and args.workdir: args.workdir = utils.safe_makedir(os.path.abspath(args.workdir)) if hasattr(args, "global_config"): error_msg = _sanity_check_args(args) if error_msg: parser.error(error_msg) kwargs = { "parallel": clargs.to_parallel(args), "workflow": args.workflow, "workdir": args.workdir } kwargs = _add_inputs_to_kwargs(args, kwargs, parser) error_msg = _sanity_check_kwargs(kwargs) if error_msg: parser.error(error_msg) else: assert sub_cmd is not None kwargs = {"args": args, "config_file": None, sub_cmd: True} return kwargs
system_config = os.path.join(_get_data_dir(), "galaxy", "bcbio_system.yaml") except ValueError as err: print(err) print( "WARNING: Attempting to read bcbio_system.yaml in the current directory." ) system_config = "bcbio_system.yaml" with open(system_config) as in_handle: config = yaml.load(in_handle) res = {'cores': args.cores_per_job} config["algorithm"] = {"num_cores": args.cores_per_job} config["resources"].update({'sambamba': res, 'samtools': res}) config["log_dir"] = os.path.join(os.path.abspath(os.getcwd()), "log") parallel = clargs.to_parallel(args) parallel.update({'progs': ['samtools', 'sambamba']}) parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) dirs = {'work': os.path.abspath(os.getcwd())} system.write_info(dirs, parallel, config) sysinfo = system.machine_info()[0] samples = _get_samples_to_process(args.csv, out_dir, config, args.force_single, args.separators) parallel = resources.calculate(parallel, [samples], sysinfo, config) with prun.start(parallel, samples, config, dirs) as run_parallel: with profile.report("prepare bcbio samples", dirs): samples = run_parallel("prepare_bcbio_samples", samples) create_new_csv(samples, args)
choices=["lsf", "slurm", "torque", "sge", "pbspro"]) parser.add_argument("-r", "--resources", help="Extra scheduler resource flags.", default=[], action="append") parser.add_argument("-q", "--queue", help="Queue to submit jobs to.") parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster", default="bcb-prep") parser.add_argument("-t", "--paralleltype", choices=["local", "ipython"], default="local", help="Run with iptyhon") args = parser.parse_args() system_config = os.path.join(_get_data_dir(), "galaxy", "bcbio_system.yaml") if args.galaxy: system_config = args.galaxy with open(system_config) as in_handle: config = yaml.load(in_handle) parallel = clargs.to_parallel(args) parallel.update({'progs': args.progs}) dirs = {'work': os.path.abspath(os.getcwd())} if args.sys_info.find(";") > -1: info = args.sys_info.split(";") sysinfo = {'cores': int(info[0]), 'memory': float(info[1])} else: if utils.file_exists(args.sys_info): sysinfo = yaml.load(open(args.sys_info))[0] print "system info %s" % sysinfo samples = [] pipelines, config = _pair_samples_with_pipelines(args.yaml_file, config) for s in pipelines: samples = [item for item in pipelines[s]] print "number of samples %s" % len(samples) print "after calculate fn"
def parse_cl_args(in_args): """Parse input commandline arguments, handling multiple cases. Returns the main config file and set of kwargs. """ sub_cmds = {"upgrade": install.add_subparser, "server": server_main.add_subparser, "runfn": runfn.add_subparser, "version": programs.add_subparser} parser = argparse.ArgumentParser( description="Best-practice pipelines for fully automated high throughput sequencing analysis.") sub_cmd = None if len(in_args) > 0 and in_args[0] in sub_cmds: subparsers = parser.add_subparsers(help="bcbio-nextgen supplemental commands") sub_cmds[in_args[0]](subparsers) sub_cmd = in_args[0] else: parser.add_argument("global_config", help="Global YAML configuration file specifying details " "about the system (optional, defaults to installed bcbio_system.yaml)", nargs="?") parser.add_argument("fc_dir", help="A directory of Illumina output or fastq files to process (optional)", nargs="?") parser.add_argument("run_config", help="YAML file with details about samples to process " "(required, unless using Galaxy LIMS as input)", nargs="*") parser.add_argument("-n", "--numcores", help="Total cores to use for processing", type=int, default=1) parser.add_argument("-t", "--paralleltype", help="Approach to parallelization", choices=["local", "ipython"], default="local") parser.add_argument("-s", "--scheduler", help="Scheduler to use for ipython parallel", choices=["lsf", "sge", "torque", "slurm"]) parser.add_argument("-q", "--queue", help="Scheduler queue to run jobs on, for ipython parallel") parser.add_argument("-r", "--resources", help=("Cluster specific resources specifications. Can be specified multiple times.\n" "Supports SGE, Torque, LSF and SLURM parameters."), default=[], action="append") parser.add_argument("--timeout", help="Number of minutes before cluster startup times out. Defaults to 15", default=15, type=int) parser.add_argument("--retries", help=("Number of retries of failed tasks during distributed processing. " "Default 0 (no retries)"), default=0, type=int) parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster", default="") parser.add_argument("-w", "--workflow", help="Run a workflow with the given commandline arguments") parser.add_argument("--workdir", help="Directory to process in. Defaults to current working directory", default=os.getcwd()) parser.add_argument("-v", "--version", help="Print current version", action="store_true") args = parser.parse_args(in_args) if hasattr(args, "global_config"): error_msg = _sanity_check_args(args) if error_msg: parser.error(error_msg) kwargs = {"parallel": clargs.to_parallel(args), "workflow": args.workflow, "workdir": args.workdir} kwargs = _add_inputs_to_kwargs(args, kwargs, parser) else: assert sub_cmd is not None kwargs = {"args": args, "config_file": None, sub_cmd: True} return kwargs
def parse_cl_args(in_args): """Parse input commandline arguments, handling multiple cases. Returns the main config file and set of kwargs. """ sub_cmds = {"upgrade": install.add_subparser, "server": server_main.add_subparser, "runfn": runfn.add_subparser, "version": programs.add_subparser, "sequencer": machine.add_subparser} parser = argparse.ArgumentParser( description="Best-practice pipelines for fully automated high throughput sequencing analysis.") sub_cmd = None if len(in_args) > 0 and in_args[0] in sub_cmds: subparsers = parser.add_subparsers(help="bcbio-nextgen supplemental commands") sub_cmds[in_args[0]](subparsers) sub_cmd = in_args[0] else: parser.add_argument("global_config", help="Global YAML configuration file specifying details " "about the system (optional, defaults to installed bcbio_system.yaml)", nargs="?") parser.add_argument("fc_dir", help="A directory of Illumina output or fastq files to process (optional)", nargs="?") parser.add_argument("run_config", help="YAML file with details about samples to process " "(required, unless using Galaxy LIMS as input)", nargs="*") parser.add_argument("-n", "--numcores", help="Total cores to use for processing", type=int, default=1) parser.add_argument("-t", "--paralleltype", help="Approach to parallelization", choices=["local", "ipython"], default="local") parser.add_argument("-s", "--scheduler", help="Scheduler to use for ipython parallel", choices=["lsf", "sge", "torque", "slurm"]) parser.add_argument("-q", "--queue", help="Scheduler queue to run jobs on, for ipython parallel") parser.add_argument("-r", "--resources", help=("Cluster specific resources specifications. Can be specified multiple times.\n" "Supports SGE, Torque, LSF and SLURM parameters."), default=[], action="append") parser.add_argument("--timeout", help="Number of minutes before cluster startup times out. Defaults to 15", default=15, type=int) parser.add_argument("--retries", help=("Number of retries of failed tasks during distributed processing. " "Default 0 (no retries)"), default=0, type=int) parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster", default="") parser.add_argument("-w", "--workflow", help="Run a workflow with the given commandline arguments") parser.add_argument("--workdir", help="Directory to process in. Defaults to current working directory", default=os.getcwd()) parser.add_argument("-v", "--version", help="Print current version", action="store_true") args = parser.parse_args(in_args) if hasattr(args, "global_config"): error_msg = _sanity_check_args(args) if error_msg: parser.error(error_msg) kwargs = {"parallel": clargs.to_parallel(args), "workflow": args.workflow, "workdir": args.workdir} kwargs = _add_inputs_to_kwargs(args, kwargs, parser) error_msg = _sanity_check_kwargs(kwargs) if error_msg: parser.error(error_msg) else: assert sub_cmd is not None kwargs = {"args": args, "config_file": None, sub_cmd: True} return kwargs