Example #1
0
def _run_biodata_upload(args):
    """Manage preparation of biodata on a local machine, uploading to S3 in pieces.
    """
    args = defaults.update_check_args(args, "biodata not uploaded")
    args = install.docker_image_arg(args)
    for gbuild in args.genomes:
        print("Preparing %s" % gbuild)
        if args.prepped:
            for target in ["samtools"] + args.aligners:
                genome.download_prepped_genome(gbuild, {}, target, False, args.prepped)
            print("Downloaded prepped %s to %s. Edit and re-run without --prepped to upload"
                  % (gbuild, args.prepped))
            return
        cl = ["upgrade", "--genomes", gbuild]
        for a in args.aligners:
            cl += ["--aligners", a]
        dmounts = mounts.prepare_system(args.datadir, DOCKER["biodata_dir"])
        manage.run_bcbio_cmd(args.image, dmounts, cl)
        print("Uploading %s" % gbuild)
        gdir = _get_basedir(args.datadir, gbuild)
        basedir, genomedir = os.path.split(gdir)
        assert genomedir == gbuild
        with utils.chdir(basedir):
            all_dirs = sorted(os.listdir(gbuild))
            _upload_biodata(gbuild, "seq", all_dirs)
            for aligner in args.aligners:
                _upload_biodata(gbuild, genome.REMAP_NAMES.get(aligner, aligner), all_dirs)
Example #2
0
def _run_biodata_upload(args):
    """Manage preparation of biodata on a local machine, uploading to S3 in pieces.
    """
    args = defaults.update_check_args(args, "biodata not uploaded")
    args = install.docker_image_arg(args)
    for gbuild in args.genomes:
        print("Preparing %s" % gbuild)
        if args.prepped:
            for target in ["samtools"] + args.aligners:
                genome.download_prepped_genome(gbuild, {}, target, False,
                                               args.prepped)
            print(
                "Downloaded prepped %s to %s. Edit and re-run without --prepped to upload"
                % (gbuild, args.prepped))
            return
        cl = ["upgrade", "--genomes", gbuild]
        for a in args.aligners:
            cl += ["--aligners", a]
        for t in args.datatarget:
            cl += ["--datatarget", t]
        dmounts = mounts.prepare_system(args.datadir, DOCKER["biodata_dir"])
        manage.run_bcbio_cmd(args.image, dmounts, cl)
        print("Uploading %s" % gbuild)
        gdir = _get_basedir(args.datadir, gbuild)
        basedir, genomedir = os.path.split(gdir)
        assert genomedir == gbuild
        with utils.chdir(basedir):
            all_dirs = sorted(os.listdir(gbuild))
            _upload_biodata(gbuild, "seq", all_dirs)
            for aligner in args.aligners + ["rtg"]:
                _upload_biodata(gbuild,
                                genome.REMAP_NAMES.get(aligner,
                                                       aligner), all_dirs)
Example #3
0
def full(args, dockerconf):
    """Full installaction of docker image and data.
    """
    updates = []
    args = add_install_defaults(args)
    if args.wrapper:
        updates.append("wrapper scripts")
        upgrade_bcbio_vm()
    dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"])
    if args.install_tools:
        updates.append("bcbio-nextgen code and third party tools")
        pull(args, dockerconf)
        _check_docker_image(args)
        # Ensure external galaxy configuration in sync when doing tool upgrade
        manage.run_bcbio_cmd(args.image, dmounts, ["upgrade"])
    if args.install_data:
        if len(args.genomes) == 0:
            print("Data not installed, no genomes provided with `--genomes` flag")
            sys.exit(1)
        elif len(args.aligners) == 0:
            print("Data not installed, no aligners provided with `--aligners` flag")
            sys.exit(1)
        else:
            updates.append("biological data")
        _check_docker_image(args)
        manage.run_bcbio_cmd(args.image, dmounts, _get_cl(args))
    _save_install_defaults(args)
    if updates:
        print("\nbcbio-nextgen-vm updated with latest %s" % " and ".join(updates))
    else:
        print("\nNo update targets specified, need '--wrapper', '--tools' or '--data'\n"
              "See 'bcbio_vm.py upgrade -h' for more details.")
Example #4
0
def cmd_ipython(args):
    args = defaults.update_check_args(args, "Could not run IPython parallel analysis.")
    args = install.docker_image_arg(args)
    parallel = clargs.to_parallel(args, "bcbiovm.docker")
    parallel["wrapper"] = "runfn"
    with open(args.sample_config) as in_handle:
        ready_config, _ = mounts.normalize_config(yaml.load(in_handle), args.fcdir)
    work_dir = os.getcwd()
    ready_config_file = os.path.join(work_dir, "%s-ready%s" %
                                     (os.path.splitext(os.path.basename(args.sample_config))))
    with open(ready_config_file, "w") as out_handle:
        yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False)
    parallel["wrapper_args"] = [DOCKER, {"sample_config": ready_config_file,
                                         "fcdir": args.fcdir,
                                         "pack": pack.shared_filesystem(work_dir, args.datadir, args.tmpdir),
                                         "systemconfig": args.systemconfig,
                                         "image": args.image}]
    # For testing, run on a local ipython cluster
    parallel["run_local"] = parallel.get("queue") == "localrun"
    workdir_mount = "%s:%s" % (work_dir, DOCKER["work_dir"])
    manage.run_bcbio_cmd(args.image, [workdir_mount],
                         ["version", "--workdir=%s" % DOCKER["work_dir"]])
    main.run_main(work_dir, run_info_yaml=ready_config_file,
                  config_file=args.systemconfig, fc_dir=args.fcdir,
                  parallel=parallel)
Example #5
0
def cmd_server(args):
    args = defaults.update_check_args(args, "Could not run server.")
    args = install.docker_image_arg(args)
    ports = ["%s:%s" % (args.port, devel.DOCKER["port"])]
    print("Running server on port %s. Press ctrl-c to exit." % args.port)
    manage.run_bcbio_cmd(args.image, [], ["server", "--port", str(devel.DOCKER["port"])],
                         ports)
Example #6
0
def cmd_server(args):
    args = defaults.update_check_args(args, "Could not run server.")
    args = install.docker_image_arg(args)
    ports = ["%s:%s" % (args.port, devel.DOCKER["port"])]
    print("Running server on port %s. Press ctrl-c to exit." % args.port)
    manage.run_bcbio_cmd(args.image, [], ["server", "--port", str(devel.DOCKER["port"])],
                         ports)
Example #7
0
def do_analysis(args, dockerconf):
    """Run a full analysis on a local machine, utilizing multiple cores.
    """
    work_dir = os.getcwd()
    with open(args.sample_config) as in_handle:
        sample_config, dmounts = mounts.update_config(yaml.load(in_handle),
                                                      args.fcdir)
    dmounts += mounts.prepare_system(args.datadir, dockerconf["biodata_dir"])
    dmounts.append("%s:%s" % (work_dir, dockerconf["work_dir"]))
    system_config, system_mounts = _read_system_config(dockerconf,
                                                       args.systemconfig,
                                                       args.datadir)
    system_cfile = os.path.join(work_dir, "bcbio_system-forvm.yaml")
    sample_cfile = os.path.join(work_dir, "bcbio_sample-forvm.yaml")
    with open(system_cfile, "w") as out_handle:
        yaml.dump(system_config,
                  out_handle,
                  default_flow_style=False,
                  allow_unicode=False)
    with open(sample_cfile, "w") as out_handle:
        yaml.dump(sample_config,
                  out_handle,
                  default_flow_style=False,
                  allow_unicode=False)
    in_files = [
        os.path.join(dockerconf["work_dir"], os.path.basename(x))
        for x in [system_cfile, sample_cfile]
    ]
    log.setup_local_logging({"include_time": False})
    manage.run_bcbio_cmd(
        args.image, dmounts + system_mounts, in_files + [
            "--numcores",
            str(args.numcores),
            "--workdir=%s" % dockerconf["work_dir"]
        ])
Example #8
0
def do_runfn(fn_name, fn_args, cmd_args, parallel, dockerconf, ports=None):
    """"Run a single defined function inside a docker container, returning results.
    """
    dmounts = []
    if cmd_args.get("sample_config"):
        with open(cmd_args["sample_config"]) as in_handle:
            _, dmounts = mounts.update_config(yaml.load(in_handle),
                                              cmd_args["fcdir"])
    datadir, fn_args = reconstitute.prep_datadir(cmd_args["pack"], fn_args)
    if "orig_systemconfig" in cmd_args:
        orig_sconfig = _get_system_configfile(cmd_args["orig_systemconfig"],
                                              datadir)
        orig_galaxydir = os.path.dirname(orig_sconfig)
        dmounts.append("%s:%s" % (orig_galaxydir, orig_galaxydir))
    work_dir, fn_args, finalizer = reconstitute.prep_workdir(
        cmd_args["pack"], parallel, fn_args)
    dmounts += mounts.prepare_system(datadir, dockerconf["biodata_dir"])
    reconstitute.prep_systemconfig(datadir, fn_args)
    _, system_mounts = _read_system_config(dockerconf,
                                           cmd_args["systemconfig"], datadir)

    dmounts.append("%s:%s" % (work_dir, dockerconf["work_dir"]))
    homedir = pwd.getpwuid(os.getuid()).pw_dir
    dmounts.append("%s:%s" % (homedir, homedir))
    all_mounts = dmounts + system_mounts

    argfile = os.path.join(work_dir,
                           "runfn-%s-%s.yaml" % (fn_name, uuid.uuid4()))
    with open(argfile, "w") as out_handle:
        yaml.safe_dump(remap.external_to_docker(fn_args, all_mounts),
                       out_handle,
                       default_flow_style=False,
                       allow_unicode=False)
    docker_argfile = os.path.join(dockerconf["work_dir"],
                                  os.path.basename(argfile))
    outfile = "%s-out%s" % os.path.splitext(argfile)
    out = None
    manage.run_bcbio_cmd(cmd_args["image"],
                         all_mounts, ["runfn", fn_name, docker_argfile],
                         ports=ports)
    if os.path.exists(outfile):
        with open(outfile) as in_handle:
            out = remap.docker_to_external(yaml.safe_load(in_handle),
                                           all_mounts)
    else:
        print("Subprocess in docker container failed")
        sys.exit(1)
    out = finalizer(out)
    for f in [argfile, outfile]:
        if os.path.exists(f):
            os.remove(f)
    return out
Example #9
0
def full(args, dockerconf):
    """Full installaction of docker image and data.
    """
    updates = []
    args = add_install_defaults(args)
    if args.wrapper:
        updates.append("wrapper scripts")
        upgrade_bcbio_vm()
    dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"])
    if args.install_tools:
        updates.append("bcbio-nextgen code and third party tools")
        pull(args, dockerconf)
        _check_docker_image(args)
        # Ensure external galaxy configuration in sync when doing tool upgrade
        manage.run_bcbio_cmd(args.image, dmounts, ["upgrade"])
    if args.install_data:
        if len(args.genomes) == 0:
            print(
                "Data not installed, no genomes provided with `--genomes` flag"
            )
            sys.exit(1)
        elif len(args.aligners) == 0:
            print(
                "Data not installed, no aligners provided with `--aligners` flag"
            )
            sys.exit(1)
        else:
            updates.append("biological data")
        if _check_docker_image(args, raise_error=False):
            manage.run_bcbio_cmd(args.image, dmounts, _get_cl(args))
        else:
            args.upgrade = False
            args.tools = False
            args.tooldir = False
            args.toolplus = False
            args.isolate = True
            args.distribution = None
            args.cwl = True
            print(args)
            from bcbio import install
            install.upgrade_bcbio(args)
    _save_install_defaults(args)
    if updates:
        print("\nbcbio-nextgen-vm updated with latest %s" %
              " and ".join(updates))
    else:
        print(
            "\nNo update targets specified, need '--wrapper', '--tools' or '--data'\n"
            "See 'bcbio_vm.py upgrade -h' for more details.")
Example #10
0
def do_analysis(args, dockerconf):
    """Run a full analysis on a local machine, utilizing multiple cores.
    """
    work_dir = os.getcwd()
    with open(args.sample_config) as in_handle:
        sample_config, dmounts = mounts.update_config(yaml.load(in_handle), dockerconf["input_dir"], args.fcdir)
    dmounts += mounts.prepare_system(args.datadir, dockerconf["biodata_dir"])
    dmounts.append("%s:%s" % (work_dir, dockerconf["work_dir"]))
    system_config, system_mounts = _read_system_config(dockerconf, args.systemconfig, args.datadir)
    system_cfile = os.path.join(work_dir, "bcbio_system-forvm.yaml")
    sample_cfile = os.path.join(work_dir, "bcbio_sample-forvm.yaml")
    with open(system_cfile, "w") as out_handle:
        yaml.dump(system_config, out_handle, default_flow_style=False, allow_unicode=False)
    with open(sample_cfile, "w") as out_handle:
        yaml.dump(sample_config, out_handle, default_flow_style=False, allow_unicode=False)
    in_files = [os.path.join(dockerconf["work_dir"], os.path.basename(x)) for x in [system_cfile, sample_cfile]]
    log.setup_local_logging({"include_time": False})
    manage.run_bcbio_cmd(dockerconf["image"], dmounts + system_mounts,
                         in_files + ["--workdir=%s" % dockerconf["work_dir"]])
Example #11
0
def do_runfn(fn_name, fn_args, cmd_args, parallel, dockerconf, ports=None):
    """"Run a single defined function inside a docker container, returning results.
    """
    dmounts = []
    if cmd_args.get("sample_config"):
        with open(cmd_args["sample_config"]) as in_handle:
            _, dmounts = mounts.update_config(yaml.load(in_handle), cmd_args["fcdir"])
    datadir, fn_args = reconstitute.prep_datadir(cmd_args["pack"], fn_args)
    if "orig_systemconfig" in cmd_args:
        orig_sconfig = _get_system_configfile(cmd_args["orig_systemconfig"], datadir)
        orig_galaxydir = os.path.dirname(orig_sconfig)
        dmounts.append("%s:%s" % (orig_galaxydir, orig_galaxydir))
    work_dir, fn_args, finalizer = reconstitute.prep_workdir(cmd_args["pack"], parallel, fn_args)
    dmounts += mounts.prepare_system(datadir, dockerconf["biodata_dir"])
    reconstitute.prep_systemconfig(datadir, fn_args)
    _, system_mounts = _read_system_config(dockerconf, cmd_args["systemconfig"], datadir)

    dmounts.append("%s:%s" % (work_dir, dockerconf["work_dir"]))
    homedir = pwd.getpwuid(os.getuid()).pw_dir
    dmounts.append("%s:%s" % (homedir, homedir))
    all_mounts = dmounts + system_mounts

    argfile = os.path.join(work_dir, "runfn-%s-%s.yaml" % (fn_name, uuid.uuid4()))
    with open(argfile, "w") as out_handle:
        yaml.safe_dump(remap.external_to_docker(fn_args, all_mounts),
                       out_handle, default_flow_style=False, allow_unicode=False)
    docker_argfile = os.path.join(dockerconf["work_dir"], os.path.basename(argfile))
    outfile = "%s-out%s" % os.path.splitext(argfile)
    out = None
    manage.run_bcbio_cmd(cmd_args["image"], all_mounts,
                         ["runfn", fn_name, docker_argfile],
                         ports=ports)
    if os.path.exists(outfile):
        with open(outfile) as in_handle:
            out = remap.docker_to_external(yaml.safe_load(in_handle), all_mounts)
    else:
        print("Subprocess in docker container failed")
        sys.exit(1)
    out = finalizer(out)
    for f in [argfile, outfile]:
        if os.path.exists(f):
            os.remove(f)
    return out
Example #12
0
def _run_biodata_upload(args):
    """Manage preparation of biodata on a local machine, uploading to S3 in pieces.
    """
    args = defaults.update_check_args(args, "biodata not uploaded")
    args = install.docker_image_arg(args)
    for gbuild in args.genomes:
        print("Preparing %s" % gbuild)
        cl = ["upgrade", "--genomes", gbuild]
        for a in args.aligners:
            cl += ["--aligners", a]
        dmounts = mounts.prepare_system(args.datadir, DOCKER["biodata_dir"])
        manage.run_bcbio_cmd(args.image, dmounts, cl)
        print("Uploading %s" % gbuild)
        gdir = _get_basedir(args.datadir, gbuild)
        basedir, genomedir = os.path.split(gdir)
        assert genomedir == gbuild
        with utils.chdir(basedir):
            all_dirs = sorted(os.listdir(gbuild))
            _upload_biodata(gbuild, "seq", all_dirs)
            for aligner in args.aligners:
                _upload_biodata(gbuild, genome.REMAP_NAMES.get(aligner, aligner), all_dirs)
def full(args, dockerconf):
    """Full installaction of docker image and data.
    """
    updates = []
    args = add_install_defaults(args)
    if args.wrapper:
        updates.append("wrapper scripts")
        upgrade_bcbio_vm()
    if args.install_tools:
        updates.append("bcbio-nextgen code and third party tools")
        if args.inplace:
            upgrade(dockerconf, args)
        else:
            pull(dockerconf)
    dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"])
    if args.install_data:
        updates.append("biological data")
    manage.run_bcbio_cmd(dockerconf["image"], dmounts, _get_cl(args))
    save_install_defaults(args)
    if updates:
        print("\nbcbio-nextgen-vm updated with latest %s" % " and ".join(updates))
Example #14
0
def run(args, docker_config):
    work_dir = os.getcwd()
    parallel = {
        "type": "clusterk",
        "queue": args.queue,
        "cores": args.numcores,
        "module": "bcbiovm.clusterk",
        "wrapper": "runfn"
    }
    with open(args.sample_config) as in_handle:
        ready_config, _ = mounts.normalize_config(yaml.safe_load(in_handle),
                                                  args.fcdir)
    ready_config_file = os.path.join(
        work_dir, "%s-ready%s" %
        (os.path.splitext(os.path.basename(args.sample_config))))
    with open(ready_config_file, "w") as out_handle:
        yaml.safe_dump(ready_config,
                       out_handle,
                       default_flow_style=False,
                       allow_unicode=False)
    parallel["pack"] = pack.prep_s3(args.biodata_bucket, args.run_bucket,
                                    "runfn_output")
    parallel["wrapper_args"] = [{
        "sample_config": ready_config_file,
        "docker_config": docker_config,
        "fcdir": args.fcdir,
        "datadir": args.datadir,
        "systemconfig": args.systemconfig
    }]
    workdir_mount = "%s:%s" % (work_dir, docker_config["work_dir"])
    manage.run_bcbio_cmd(
        args.image, [workdir_mount],
        ["version", "--workdir=%s" % docker_config["work_dir"]])

    from bcbio.pipeline import main
    main.run_main(work_dir,
                  run_info_yaml=ready_config_file,
                  config_file=args.systemconfig,
                  fc_dir=args.fcdir,
                  parallel=parallel)
Example #15
0
def full(args, dockerconf):
    """Full installaction of docker image and data.
    """
    updates = []
    args = add_install_defaults(args)
    if args.wrapper:
        updates.append("wrapper scripts")
        upgrade_bcbio_vm()
    if args.install_tools:
        updates.append("bcbio-nextgen code and third party tools")
        pull(args, dockerconf)
    _check_docker_image(args)
    dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"])
    if args.install_data:
        updates.append("biological data")
    manage.run_bcbio_cmd(args.image, dmounts, _get_cl(args))
    _save_install_defaults(args)
    if updates:
        print("\nbcbio-nextgen-vm updated with latest %s" % " and ".join(updates))
    else:
        print("\nNo update targets specified, need '--wrapper', '--tools' or '--data'\n"
              "See 'bcbio_vm.py upgrade -h' for more details.")
Example #16
0
def run(args, docker_config):
    work_dir = os.getcwd()
    parallel = {"type": "clusterk", "queue": args.queue, "cores": args.numcores,
                "module": "bcbiovm.clusterk", "wrapper": "runfn"}
    with open(args.sample_config) as in_handle:
        ready_config, _ = mounts.normalize_config(yaml.load(in_handle), args.fcdir)
    ready_config_file = os.path.join(work_dir, "%s-ready%s" %
                                     (os.path.splitext(os.path.basename(args.sample_config))))
    with open(ready_config_file, "w") as out_handle:
        yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False)
    parallel["pack"] = pack.prep_s3(args.biodata_bucket, args.run_bucket, "runfn_output")
    parallel["wrapper_args"] = [{"sample_config": ready_config_file,
                                 "docker_config": docker_config,
                                 "fcdir": args.fcdir,
                                 "datadir": args.datadir,
                                 "systemconfig": args.systemconfig}]
    workdir_mount = "%s:%s" % (work_dir, docker_config["work_dir"])
    manage.run_bcbio_cmd(args.image, [workdir_mount],
                         ["version", "--workdir=%s" % docker_config["work_dir"]])
    main.run_main(work_dir, run_info_yaml=ready_config_file,
                  config_file=args.systemconfig, fc_dir=args.fcdir,
                  parallel=parallel)
def upgrade(dockerconf, args):
    """Perform an in-place upgrade of tools and code inside a container.
    """
    dmounts = mounts.prepare_system(args.datadir, dockerconf["biodata_dir"])
    cid = manage.run_bcbio_cmd(dockerconf["image"], dmounts, ["upgrade", "-u", "development", "--tools"])
    subprocess.check_call(["docker", "commit", cid, dockerconf["image"]])
def cmd_server(args):
    args = defaults.update_check_args(args, "Could not run server.")
    ports = ["%s:%s" % (args.port, DOCKER["port"])]
    print("Running server on port %s. Press ctrl-c to exit." % args.port)
    manage.run_bcbio_cmd(DOCKER["image"], [], ["server", "--port", str(DOCKER["port"])],
                         ports)