def _run_biodata_upload(args): """Manage preparation of biodata on a local machine, uploading to S3 in pieces. """ args = defaults.update_check_args(args, "biodata not uploaded") args = install.docker_image_arg(args) for gbuild in args.genomes: print("Preparing %s" % gbuild) if args.prepped: for target in ["samtools"] + args.aligners: genome.download_prepped_genome(gbuild, {}, target, False, args.prepped) print("Downloaded prepped %s to %s. Edit and re-run without --prepped to upload" % (gbuild, args.prepped)) return cl = ["upgrade", "--genomes", gbuild] for a in args.aligners: cl += ["--aligners", a] dmounts = mounts.prepare_system(args.datadir, DOCKER["biodata_dir"]) manage.run_bcbio_cmd(args.image, dmounts, cl) print("Uploading %s" % gbuild) gdir = _get_basedir(args.datadir, gbuild) basedir, genomedir = os.path.split(gdir) assert genomedir == gbuild with utils.chdir(basedir): all_dirs = sorted(os.listdir(gbuild)) _upload_biodata(gbuild, "seq", all_dirs) for aligner in args.aligners: _upload_biodata(gbuild, genome.REMAP_NAMES.get(aligner, aligner), all_dirs)
def _run_biodata_upload(args): """Manage preparation of biodata on a local machine, uploading to S3 in pieces. """ args = defaults.update_check_args(args, "biodata not uploaded") args = install.docker_image_arg(args) for gbuild in args.genomes: print("Preparing %s" % gbuild) if args.prepped: for target in ["samtools"] + args.aligners: genome.download_prepped_genome(gbuild, {}, target, False, args.prepped) print( "Downloaded prepped %s to %s. Edit and re-run without --prepped to upload" % (gbuild, args.prepped)) return cl = ["upgrade", "--genomes", gbuild] for a in args.aligners: cl += ["--aligners", a] for t in args.datatarget: cl += ["--datatarget", t] dmounts = mounts.prepare_system(args.datadir, DOCKER["biodata_dir"]) manage.run_bcbio_cmd(args.image, dmounts, cl) print("Uploading %s" % gbuild) gdir = _get_basedir(args.datadir, gbuild) basedir, genomedir = os.path.split(gdir) assert genomedir == gbuild with utils.chdir(basedir): all_dirs = sorted(os.listdir(gbuild)) _upload_biodata(gbuild, "seq", all_dirs) for aligner in args.aligners + ["rtg"]: _upload_biodata(gbuild, genome.REMAP_NAMES.get(aligner, aligner), all_dirs)
def full(args, dockerconf): """Full installaction of docker image and data. """ updates = [] args = add_install_defaults(args) if args.wrapper: updates.append("wrapper scripts") upgrade_bcbio_vm() dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"]) if args.install_tools: updates.append("bcbio-nextgen code and third party tools") pull(args, dockerconf) _check_docker_image(args) # Ensure external galaxy configuration in sync when doing tool upgrade manage.run_bcbio_cmd(args.image, dmounts, ["upgrade"]) if args.install_data: if len(args.genomes) == 0: print("Data not installed, no genomes provided with `--genomes` flag") sys.exit(1) elif len(args.aligners) == 0: print("Data not installed, no aligners provided with `--aligners` flag") sys.exit(1) else: updates.append("biological data") _check_docker_image(args) manage.run_bcbio_cmd(args.image, dmounts, _get_cl(args)) _save_install_defaults(args) if updates: print("\nbcbio-nextgen-vm updated with latest %s" % " and ".join(updates)) else: print("\nNo update targets specified, need '--wrapper', '--tools' or '--data'\n" "See 'bcbio_vm.py upgrade -h' for more details.")
def cmd_ipython(args): args = defaults.update_check_args(args, "Could not run IPython parallel analysis.") args = install.docker_image_arg(args) parallel = clargs.to_parallel(args, "bcbiovm.docker") parallel["wrapper"] = "runfn" with open(args.sample_config) as in_handle: ready_config, _ = mounts.normalize_config(yaml.load(in_handle), args.fcdir) work_dir = os.getcwd() ready_config_file = os.path.join(work_dir, "%s-ready%s" % (os.path.splitext(os.path.basename(args.sample_config)))) with open(ready_config_file, "w") as out_handle: yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False) parallel["wrapper_args"] = [DOCKER, {"sample_config": ready_config_file, "fcdir": args.fcdir, "pack": pack.shared_filesystem(work_dir, args.datadir, args.tmpdir), "systemconfig": args.systemconfig, "image": args.image}] # For testing, run on a local ipython cluster parallel["run_local"] = parallel.get("queue") == "localrun" workdir_mount = "%s:%s" % (work_dir, DOCKER["work_dir"]) manage.run_bcbio_cmd(args.image, [workdir_mount], ["version", "--workdir=%s" % DOCKER["work_dir"]]) main.run_main(work_dir, run_info_yaml=ready_config_file, config_file=args.systemconfig, fc_dir=args.fcdir, parallel=parallel)
def cmd_server(args): args = defaults.update_check_args(args, "Could not run server.") args = install.docker_image_arg(args) ports = ["%s:%s" % (args.port, devel.DOCKER["port"])] print("Running server on port %s. Press ctrl-c to exit." % args.port) manage.run_bcbio_cmd(args.image, [], ["server", "--port", str(devel.DOCKER["port"])], ports)
def do_analysis(args, dockerconf): """Run a full analysis on a local machine, utilizing multiple cores. """ work_dir = os.getcwd() with open(args.sample_config) as in_handle: sample_config, dmounts = mounts.update_config(yaml.load(in_handle), args.fcdir) dmounts += mounts.prepare_system(args.datadir, dockerconf["biodata_dir"]) dmounts.append("%s:%s" % (work_dir, dockerconf["work_dir"])) system_config, system_mounts = _read_system_config(dockerconf, args.systemconfig, args.datadir) system_cfile = os.path.join(work_dir, "bcbio_system-forvm.yaml") sample_cfile = os.path.join(work_dir, "bcbio_sample-forvm.yaml") with open(system_cfile, "w") as out_handle: yaml.dump(system_config, out_handle, default_flow_style=False, allow_unicode=False) with open(sample_cfile, "w") as out_handle: yaml.dump(sample_config, out_handle, default_flow_style=False, allow_unicode=False) in_files = [ os.path.join(dockerconf["work_dir"], os.path.basename(x)) for x in [system_cfile, sample_cfile] ] log.setup_local_logging({"include_time": False}) manage.run_bcbio_cmd( args.image, dmounts + system_mounts, in_files + [ "--numcores", str(args.numcores), "--workdir=%s" % dockerconf["work_dir"] ])
def do_runfn(fn_name, fn_args, cmd_args, parallel, dockerconf, ports=None): """"Run a single defined function inside a docker container, returning results. """ dmounts = [] if cmd_args.get("sample_config"): with open(cmd_args["sample_config"]) as in_handle: _, dmounts = mounts.update_config(yaml.load(in_handle), cmd_args["fcdir"]) datadir, fn_args = reconstitute.prep_datadir(cmd_args["pack"], fn_args) if "orig_systemconfig" in cmd_args: orig_sconfig = _get_system_configfile(cmd_args["orig_systemconfig"], datadir) orig_galaxydir = os.path.dirname(orig_sconfig) dmounts.append("%s:%s" % (orig_galaxydir, orig_galaxydir)) work_dir, fn_args, finalizer = reconstitute.prep_workdir( cmd_args["pack"], parallel, fn_args) dmounts += mounts.prepare_system(datadir, dockerconf["biodata_dir"]) reconstitute.prep_systemconfig(datadir, fn_args) _, system_mounts = _read_system_config(dockerconf, cmd_args["systemconfig"], datadir) dmounts.append("%s:%s" % (work_dir, dockerconf["work_dir"])) homedir = pwd.getpwuid(os.getuid()).pw_dir dmounts.append("%s:%s" % (homedir, homedir)) all_mounts = dmounts + system_mounts argfile = os.path.join(work_dir, "runfn-%s-%s.yaml" % (fn_name, uuid.uuid4())) with open(argfile, "w") as out_handle: yaml.safe_dump(remap.external_to_docker(fn_args, all_mounts), out_handle, default_flow_style=False, allow_unicode=False) docker_argfile = os.path.join(dockerconf["work_dir"], os.path.basename(argfile)) outfile = "%s-out%s" % os.path.splitext(argfile) out = None manage.run_bcbio_cmd(cmd_args["image"], all_mounts, ["runfn", fn_name, docker_argfile], ports=ports) if os.path.exists(outfile): with open(outfile) as in_handle: out = remap.docker_to_external(yaml.safe_load(in_handle), all_mounts) else: print("Subprocess in docker container failed") sys.exit(1) out = finalizer(out) for f in [argfile, outfile]: if os.path.exists(f): os.remove(f) return out
def full(args, dockerconf): """Full installaction of docker image and data. """ updates = [] args = add_install_defaults(args) if args.wrapper: updates.append("wrapper scripts") upgrade_bcbio_vm() dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"]) if args.install_tools: updates.append("bcbio-nextgen code and third party tools") pull(args, dockerconf) _check_docker_image(args) # Ensure external galaxy configuration in sync when doing tool upgrade manage.run_bcbio_cmd(args.image, dmounts, ["upgrade"]) if args.install_data: if len(args.genomes) == 0: print( "Data not installed, no genomes provided with `--genomes` flag" ) sys.exit(1) elif len(args.aligners) == 0: print( "Data not installed, no aligners provided with `--aligners` flag" ) sys.exit(1) else: updates.append("biological data") if _check_docker_image(args, raise_error=False): manage.run_bcbio_cmd(args.image, dmounts, _get_cl(args)) else: args.upgrade = False args.tools = False args.tooldir = False args.toolplus = False args.isolate = True args.distribution = None args.cwl = True print(args) from bcbio import install install.upgrade_bcbio(args) _save_install_defaults(args) if updates: print("\nbcbio-nextgen-vm updated with latest %s" % " and ".join(updates)) else: print( "\nNo update targets specified, need '--wrapper', '--tools' or '--data'\n" "See 'bcbio_vm.py upgrade -h' for more details.")
def do_analysis(args, dockerconf): """Run a full analysis on a local machine, utilizing multiple cores. """ work_dir = os.getcwd() with open(args.sample_config) as in_handle: sample_config, dmounts = mounts.update_config(yaml.load(in_handle), dockerconf["input_dir"], args.fcdir) dmounts += mounts.prepare_system(args.datadir, dockerconf["biodata_dir"]) dmounts.append("%s:%s" % (work_dir, dockerconf["work_dir"])) system_config, system_mounts = _read_system_config(dockerconf, args.systemconfig, args.datadir) system_cfile = os.path.join(work_dir, "bcbio_system-forvm.yaml") sample_cfile = os.path.join(work_dir, "bcbio_sample-forvm.yaml") with open(system_cfile, "w") as out_handle: yaml.dump(system_config, out_handle, default_flow_style=False, allow_unicode=False) with open(sample_cfile, "w") as out_handle: yaml.dump(sample_config, out_handle, default_flow_style=False, allow_unicode=False) in_files = [os.path.join(dockerconf["work_dir"], os.path.basename(x)) for x in [system_cfile, sample_cfile]] log.setup_local_logging({"include_time": False}) manage.run_bcbio_cmd(dockerconf["image"], dmounts + system_mounts, in_files + ["--workdir=%s" % dockerconf["work_dir"]])
def do_runfn(fn_name, fn_args, cmd_args, parallel, dockerconf, ports=None): """"Run a single defined function inside a docker container, returning results. """ dmounts = [] if cmd_args.get("sample_config"): with open(cmd_args["sample_config"]) as in_handle: _, dmounts = mounts.update_config(yaml.load(in_handle), cmd_args["fcdir"]) datadir, fn_args = reconstitute.prep_datadir(cmd_args["pack"], fn_args) if "orig_systemconfig" in cmd_args: orig_sconfig = _get_system_configfile(cmd_args["orig_systemconfig"], datadir) orig_galaxydir = os.path.dirname(orig_sconfig) dmounts.append("%s:%s" % (orig_galaxydir, orig_galaxydir)) work_dir, fn_args, finalizer = reconstitute.prep_workdir(cmd_args["pack"], parallel, fn_args) dmounts += mounts.prepare_system(datadir, dockerconf["biodata_dir"]) reconstitute.prep_systemconfig(datadir, fn_args) _, system_mounts = _read_system_config(dockerconf, cmd_args["systemconfig"], datadir) dmounts.append("%s:%s" % (work_dir, dockerconf["work_dir"])) homedir = pwd.getpwuid(os.getuid()).pw_dir dmounts.append("%s:%s" % (homedir, homedir)) all_mounts = dmounts + system_mounts argfile = os.path.join(work_dir, "runfn-%s-%s.yaml" % (fn_name, uuid.uuid4())) with open(argfile, "w") as out_handle: yaml.safe_dump(remap.external_to_docker(fn_args, all_mounts), out_handle, default_flow_style=False, allow_unicode=False) docker_argfile = os.path.join(dockerconf["work_dir"], os.path.basename(argfile)) outfile = "%s-out%s" % os.path.splitext(argfile) out = None manage.run_bcbio_cmd(cmd_args["image"], all_mounts, ["runfn", fn_name, docker_argfile], ports=ports) if os.path.exists(outfile): with open(outfile) as in_handle: out = remap.docker_to_external(yaml.safe_load(in_handle), all_mounts) else: print("Subprocess in docker container failed") sys.exit(1) out = finalizer(out) for f in [argfile, outfile]: if os.path.exists(f): os.remove(f) return out
def _run_biodata_upload(args): """Manage preparation of biodata on a local machine, uploading to S3 in pieces. """ args = defaults.update_check_args(args, "biodata not uploaded") args = install.docker_image_arg(args) for gbuild in args.genomes: print("Preparing %s" % gbuild) cl = ["upgrade", "--genomes", gbuild] for a in args.aligners: cl += ["--aligners", a] dmounts = mounts.prepare_system(args.datadir, DOCKER["biodata_dir"]) manage.run_bcbio_cmd(args.image, dmounts, cl) print("Uploading %s" % gbuild) gdir = _get_basedir(args.datadir, gbuild) basedir, genomedir = os.path.split(gdir) assert genomedir == gbuild with utils.chdir(basedir): all_dirs = sorted(os.listdir(gbuild)) _upload_biodata(gbuild, "seq", all_dirs) for aligner in args.aligners: _upload_biodata(gbuild, genome.REMAP_NAMES.get(aligner, aligner), all_dirs)
def full(args, dockerconf): """Full installaction of docker image and data. """ updates = [] args = add_install_defaults(args) if args.wrapper: updates.append("wrapper scripts") upgrade_bcbio_vm() if args.install_tools: updates.append("bcbio-nextgen code and third party tools") if args.inplace: upgrade(dockerconf, args) else: pull(dockerconf) dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"]) if args.install_data: updates.append("biological data") manage.run_bcbio_cmd(dockerconf["image"], dmounts, _get_cl(args)) save_install_defaults(args) if updates: print("\nbcbio-nextgen-vm updated with latest %s" % " and ".join(updates))
def run(args, docker_config): work_dir = os.getcwd() parallel = { "type": "clusterk", "queue": args.queue, "cores": args.numcores, "module": "bcbiovm.clusterk", "wrapper": "runfn" } with open(args.sample_config) as in_handle: ready_config, _ = mounts.normalize_config(yaml.safe_load(in_handle), args.fcdir) ready_config_file = os.path.join( work_dir, "%s-ready%s" % (os.path.splitext(os.path.basename(args.sample_config)))) with open(ready_config_file, "w") as out_handle: yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False) parallel["pack"] = pack.prep_s3(args.biodata_bucket, args.run_bucket, "runfn_output") parallel["wrapper_args"] = [{ "sample_config": ready_config_file, "docker_config": docker_config, "fcdir": args.fcdir, "datadir": args.datadir, "systemconfig": args.systemconfig }] workdir_mount = "%s:%s" % (work_dir, docker_config["work_dir"]) manage.run_bcbio_cmd( args.image, [workdir_mount], ["version", "--workdir=%s" % docker_config["work_dir"]]) from bcbio.pipeline import main main.run_main(work_dir, run_info_yaml=ready_config_file, config_file=args.systemconfig, fc_dir=args.fcdir, parallel=parallel)
def full(args, dockerconf): """Full installaction of docker image and data. """ updates = [] args = add_install_defaults(args) if args.wrapper: updates.append("wrapper scripts") upgrade_bcbio_vm() if args.install_tools: updates.append("bcbio-nextgen code and third party tools") pull(args, dockerconf) _check_docker_image(args) dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"]) if args.install_data: updates.append("biological data") manage.run_bcbio_cmd(args.image, dmounts, _get_cl(args)) _save_install_defaults(args) if updates: print("\nbcbio-nextgen-vm updated with latest %s" % " and ".join(updates)) else: print("\nNo update targets specified, need '--wrapper', '--tools' or '--data'\n" "See 'bcbio_vm.py upgrade -h' for more details.")
def run(args, docker_config): work_dir = os.getcwd() parallel = {"type": "clusterk", "queue": args.queue, "cores": args.numcores, "module": "bcbiovm.clusterk", "wrapper": "runfn"} with open(args.sample_config) as in_handle: ready_config, _ = mounts.normalize_config(yaml.load(in_handle), args.fcdir) ready_config_file = os.path.join(work_dir, "%s-ready%s" % (os.path.splitext(os.path.basename(args.sample_config)))) with open(ready_config_file, "w") as out_handle: yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False) parallel["pack"] = pack.prep_s3(args.biodata_bucket, args.run_bucket, "runfn_output") parallel["wrapper_args"] = [{"sample_config": ready_config_file, "docker_config": docker_config, "fcdir": args.fcdir, "datadir": args.datadir, "systemconfig": args.systemconfig}] workdir_mount = "%s:%s" % (work_dir, docker_config["work_dir"]) manage.run_bcbio_cmd(args.image, [workdir_mount], ["version", "--workdir=%s" % docker_config["work_dir"]]) main.run_main(work_dir, run_info_yaml=ready_config_file, config_file=args.systemconfig, fc_dir=args.fcdir, parallel=parallel)
def upgrade(dockerconf, args): """Perform an in-place upgrade of tools and code inside a container. """ dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"]) cid = manage.run_bcbio_cmd(dockerconf["image"], dmounts, ["upgrade", "-u", "development", "--tools"]) subprocess.check_call(["docker", "commit", cid, dockerconf["image"]])
def cmd_server(args): args = defaults.update_check_args(args, "Could not run server.") ports = ["%s:%s" % (args.port, DOCKER["port"])] print("Running server on port %s. Press ctrl-c to exit." % args.port) manage.run_bcbio_cmd(DOCKER["image"], [], ["server", "--port", str(DOCKER["port"])], ports)