예제 #1
0
 def get(self):
     rargs = yaml.safe_load(
         StringIO.StringIO(str(self.get_argument("args", "{}"))))
     system_config = args.config or "bcbio_system.yaml"
     if "system_config" in rargs:
         system_config = os.path.join(rargs["work_dir"],
                                      "web-system_config.yaml")
         with open(system_config, "w") as out_handle:
             yaml.dump(rargs["system_config"],
                       out_handle,
                       default_flow_style=False,
                       allow_unicode=False)
     if "sample_config" in rargs:
         sample_config = os.path.join(rargs["work_dir"],
                                      "web-sample_config.yaml")
         with open(sample_config, "w") as out_handle:
             yaml.dump(rargs["sample_config"],
                       out_handle,
                       default_flow_style=False,
                       allow_unicode=False)
     else:
         sample_config = rargs.get("run_config")
     kwargs = {
         "work_dir": rargs["work_dir"],
         "config_file": system_config,
         "run_info_yaml": sample_config,
         "fc_dir": rargs.get("fc_dir"),
         "parallel": clargs.to_parallel(_rargs_to_parallel_args(rargs)),
         "app": self.application
     }
     run_id = yield tornado.gen.Task(run_bcbio_nextgen, **kwargs)
     self.write(run_id)
     self.finish()
예제 #2
0
def cmd_ipython(args):
    args = defaults.update_check_args(args, "Could not run IPython parallel analysis.")
    args = install.docker_image_arg(args)
    parallel = clargs.to_parallel(args, "bcbiovm.docker")
    parallel["wrapper"] = "runfn"
    with open(args.sample_config) as in_handle:
        ready_config, _ = mounts.normalize_config(yaml.load(in_handle), args.fcdir)
    work_dir = os.getcwd()
    ready_config_file = os.path.join(work_dir, "%s-ready%s" %
                                     (os.path.splitext(os.path.basename(args.sample_config))))
    with open(ready_config_file, "w") as out_handle:
        yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False)
    work_dir = os.getcwd()
    systemconfig = run.local_system_config(args.systemconfig, args.datadir, work_dir)
    cur_pack = pack.shared_filesystem(work_dir, args.datadir, args.tmpdir)
    parallel["wrapper_args"] = [devel.DOCKER, {"sample_config": ready_config_file,
                                               "fcdir": args.fcdir,
                                               "pack": cur_pack,
                                               "systemconfig": systemconfig,
                                               "image": args.image}]
    # For testing, run on a local ipython cluster
    parallel["run_local"] = parallel.get("queue") == "localrun"

    from bcbio.pipeline import main
    main.run_main(work_dir, run_info_yaml=ready_config_file,
                  config_file=systemconfig, fc_dir=args.fcdir,
                  parallel=parallel)
예제 #3
0
def cmd_ipython(args):
    args = defaults.update_check_args(args, "Could not run IPython parallel analysis.")
    args = install.docker_image_arg(args)
    parallel = clargs.to_parallel(args, "bcbiovm.docker")
    parallel["wrapper"] = "runfn"
    with open(args.sample_config) as in_handle:
        ready_config, _ = mounts.normalize_config(yaml.safe_load(in_handle), args.fcdir)
    work_dir = os.getcwd()
    ready_config_file = os.path.join(work_dir, "%s-ready%s" %
                                     (os.path.splitext(os.path.basename(args.sample_config))))
    with open(ready_config_file, "w") as out_handle:
        yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False)
    work_dir = os.getcwd()
    systemconfig = run.local_system_config(args.systemconfig, args.datadir, work_dir)
    cur_pack = pack.shared_filesystem(work_dir, args.datadir, args.tmpdir)
    parallel["wrapper_args"] = [devel.DOCKER, {"sample_config": ready_config_file,
                                               "fcdir": args.fcdir,
                                               "pack": cur_pack,
                                               "systemconfig": systemconfig,
                                               "image": args.image}]
    # For testing, run on a local ipython cluster
    parallel["run_local"] = parallel.get("queue") == "localrun"

    from bcbio.pipeline import main
    main.run_main(work_dir, run_info_yaml=ready_config_file,
                  config_file=systemconfig, fc_dir=args.fcdir,
                  parallel=parallel)
예제 #4
0
def cmd_ipython(args):
    args = defaults.update_check_args(args, "Could not run IPython parallel analysis.")
    args = install.docker_image_arg(args)
    parallel = clargs.to_parallel(args, "bcbiovm.docker")
    parallel["wrapper"] = "runfn"
    with open(args.sample_config) as in_handle:
        ready_config, _ = mounts.normalize_config(yaml.load(in_handle), args.fcdir)
    work_dir = os.getcwd()
    ready_config_file = os.path.join(work_dir, "%s-ready%s" %
                                     (os.path.splitext(os.path.basename(args.sample_config))))
    with open(ready_config_file, "w") as out_handle:
        yaml.safe_dump(ready_config, out_handle, default_flow_style=False, allow_unicode=False)
    parallel["wrapper_args"] = [DOCKER, {"sample_config": ready_config_file,
                                         "fcdir": args.fcdir,
                                         "pack": pack.shared_filesystem(work_dir, args.datadir, args.tmpdir),
                                         "systemconfig": args.systemconfig,
                                         "image": args.image}]
    # For testing, run on a local ipython cluster
    parallel["run_local"] = parallel.get("queue") == "localrun"
    workdir_mount = "%s:%s" % (work_dir, DOCKER["work_dir"])
    manage.run_bcbio_cmd(args.image, [workdir_mount],
                         ["version", "--workdir=%s" % DOCKER["work_dir"]])
    main.run_main(work_dir, run_info_yaml=ready_config_file,
                  config_file=args.systemconfig, fc_dir=args.fcdir,
                  parallel=parallel)
예제 #5
0
def _calculate_resources(data, args, resources):
    parallel = clargs.to_parallel(args)
    config = data[0][0]['config']
    config['resources'].update({resources['name']: {'memory': "%sg" % resources['mem'], 'cores': resources['cores']}})
    parallel.update({'progs': [resources['name']]})
    # parallel = log.create_base_logger(config, parallel)
    # log.setup_local_logging(config, parallel)
    log.setup_log(config, parallel)
    dirs = {'work': os.path.abspath(os.getcwd())}
    system.write_info(dirs, parallel, config)
    sysinfo = system.machine_info()[0]
    log.logger.info("Number of items %s" % len(data))
    parallel = res.calculate(parallel, data, sysinfo, config)
    log.logger.info(parallel)
    # print parallel
    # raise
    return parallel
예제 #6
0
 def get(self):
     rargs = yaml.safe_load(StringIO(str(self.get_argument("args", "{}"))))
     system_config = args.config or "bcbio_system.yaml"
     if "system_config" in rargs:
         system_config = os.path.join(rargs["work_dir"], "web-system_config.yaml")
         with open(system_config, "w") as out_handle:
             yaml.safe_dump(rargs["system_config"], out_handle, default_flow_style=False, allow_unicode=False)
     if "sample_config" in rargs:
         sample_config = os.path.join(rargs["work_dir"], "web-sample_config.yaml")
         with open(sample_config, "w") as out_handle:
             yaml.safe_dump(rargs["sample_config"], out_handle, default_flow_style=False, allow_unicode=False)
     else:
         sample_config = rargs.get("run_config")
     kwargs = {"workdir": rargs["work_dir"],
               "config_file": system_config,
               "run_info_yaml": sample_config,
               "fc_dir": rargs.get("fc_dir"),
               "parallel": clargs.to_parallel(_rargs_to_parallel_args(rargs, args)),
               "app": self.application}
     run_id = yield tornado.gen.Task(run_bcbio_nextgen, **kwargs)
     self.write(run_id)
     self.finish()
예제 #7
0
def parse_cl_args(in_args):
    """Parse input commandline arguments, handling multiple cases.

    Returns the main config file and set of kwargs.
    """
    sub_cmds = {"upgrade": install.add_subparser,
                "server": server_main.add_subparser,
                "runfn": runfn.add_subparser,
                "graph": graph.add_subparser,
                "version": programs.add_subparser,
                "sequencer": machine.add_subparser}
    description = "Community developed high throughput sequencing analysis."
    parser = argparse.ArgumentParser(description=description)
    sub_cmd = None
    if len(in_args) > 0 and in_args[0] in sub_cmds:
        subparser_help = "bcbio-nextgen supplemental commands"
        subparsers = parser.add_subparsers(help=subparser_help)
        sub_cmds[in_args[0]](subparsers)
        sub_cmd = in_args[0]
    else:
        parser.add_argument("global_config", nargs="?",
                            help=("Global YAML configuration file specifying "
                                  "details about the system (optional, "
                                  "defaults to installed bcbio_system.yaml)"))
        parser.add_argument("fc_dir", nargs="?",
                            help=("A directory of Illumina output or fastq "
                                  "files to process (optional)"))
        parser.add_argument("run_config", nargs="*",
                            help=("YAML file with details about samples to "
                                  "process (required, unless using Galaxy "
                                  "LIMS as input)")),
        parser.add_argument("-n", "--numcores", type=int, default=1,
                            help="Total cores to use for processing")
        parser.add_argument("-t", "--paralleltype",
                            choices=["local", "ipython"],
                            default="local", help="Approach to parallelization")
        parser.add_argument("-s", "--scheduler",
                            choices=["lsf", "sge", "torque", "slurm", "pbspro"],
                            help="Scheduler to use for ipython parallel")
        parser.add_argument("--local_controller",
                            default=False,
                            action="store_true",
                            help="run controller locally")
        parser.add_argument("-q", "--queue",
                            help=("Scheduler queue to run jobs on, for "
                                  "ipython parallel"))
        parser.add_argument("-r", "--resources",
                            help=("Cluster specific resources specifications. "
                                  "Can be specified multiple times.\n"
                                  "Supports SGE, Torque, LSF and SLURM "
                                  "parameters."), default=[], action="append")
        parser.add_argument("--timeout", default=15, type=int,
                            help=("Number of minutes before cluster startup "
                                  "times out. Defaults to 15"))
        parser.add_argument("--retries", default=0, type=int,
                            help=("Number of retries of failed tasks during "
                                  "distributed processing. Default 0 "
                                  "(no retries)"))
        parser.add_argument("-p", "--tag",
                            help="Tag name to label jobs on the cluster",
                            default="")
        parser.add_argument("-w", "--workflow",
                            help=("Run a workflow with the given commandline "
                                  "arguments"))
        parser.add_argument("--workdir", default=os.getcwd(),
                            help=("Directory to process in. Defaults to "
                                  "current working directory"))
        parser.add_argument("-v", "--version", help="Print current version",
                            action="store_true")
        # Hidden arguments passed downstream
        parser.add_argument("--only-metadata", help=argparse.SUPPRESS, action="store_true", default=False)
    args = parser.parse_args(in_args)
    if hasattr(args, "workdir"):
        args.workdir = utils.safe_makedir(os.path.abspath(args.workdir))
    if hasattr(args, "global_config"):
        error_msg = _sanity_check_args(args)
        if error_msg:
            parser.error(error_msg)
        kwargs = {"parallel": clargs.to_parallel(args),
                  "workflow": args.workflow,
                  "workdir": args.workdir}
        kwargs = _add_inputs_to_kwargs(args, kwargs, parser)
        error_msg = _sanity_check_kwargs(kwargs)
        if error_msg:
            parser.error(error_msg)
    else:
        assert sub_cmd is not None
        kwargs = {"args": args,
                  "config_file": None,
                  sub_cmd: True}
    return kwargs
예제 #8
0
def parse_cl_args(in_args):
    """Parse input commandline arguments, handling multiple cases.

    Returns the main config file and set of kwargs.
    """
    sub_cmds = {
        "upgrade": install.add_subparser,
        "server": server_main.add_subparser,
        "runfn": runfn.add_subparser,
        "graph": graph.add_subparser,
        "version": programs.add_subparser,
        "sequencer": machine.add_subparser
    }
    description = "Community developed high throughput sequencing analysis."
    parser = argparse.ArgumentParser(description=description)
    sub_cmd = None
    if len(in_args) > 0 and in_args[0] in sub_cmds:
        subparser_help = "bcbio-nextgen supplemental commands"
        subparsers = parser.add_subparsers(help=subparser_help)
        sub_cmds[in_args[0]](subparsers)
        sub_cmd = in_args[0]
    else:
        parser.add_argument("global_config",
                            nargs="?",
                            help=("Global YAML configuration file specifying "
                                  "details about the system (optional, "
                                  "defaults to installed bcbio_system.yaml)"))
        parser.add_argument("fc_dir",
                            nargs="?",
                            help=("A directory of Illumina output or fastq "
                                  "files to process (optional)"))
        parser.add_argument("run_config",
                            nargs="*",
                            help=("YAML file with details about samples to "
                                  "process (required, unless using Galaxy "
                                  "LIMS as input)")),
        parser.add_argument("-n",
                            "--numcores",
                            type=int,
                            default=1,
                            help="Total cores to use for processing")
        parser.add_argument("-t",
                            "--paralleltype",
                            choices=["local", "ipython"],
                            default="local",
                            help="Approach to parallelization")
        parser.add_argument(
            "-s",
            "--scheduler",
            choices=["lsf", "sge", "torque", "slurm", "pbspro"],
            help="Scheduler to use for ipython parallel")
        parser.add_argument("--local_controller",
                            default=False,
                            action="store_true",
                            help="run controller locally")
        parser.add_argument("-q",
                            "--queue",
                            help=("Scheduler queue to run jobs on, for "
                                  "ipython parallel"))
        parser.add_argument("-r",
                            "--resources",
                            help=("Cluster specific resources specifications. "
                                  "Can be specified multiple times.\n"
                                  "Supports SGE, Torque, LSF and SLURM "
                                  "parameters."),
                            default=[],
                            action="append")
        parser.add_argument("--timeout",
                            default=15,
                            type=int,
                            help=("Number of minutes before cluster startup "
                                  "times out. Defaults to 15"))
        parser.add_argument("--retries",
                            default=0,
                            type=int,
                            help=("Number of retries of failed tasks during "
                                  "distributed processing. Default 0 "
                                  "(no retries)"))
        parser.add_argument("-p",
                            "--tag",
                            help="Tag name to label jobs on the cluster",
                            default="")
        parser.add_argument("-w",
                            "--workflow",
                            help=("Run a workflow with the given commandline "
                                  "arguments"))
        parser.add_argument("--workdir",
                            default=os.getcwd(),
                            help=("Directory to process in. Defaults to "
                                  "current working directory"))
        parser.add_argument("-v",
                            "--version",
                            help="Print current version",
                            action="store_true")
        # Hidden arguments passed downstream
        parser.add_argument("--only-metadata",
                            help=argparse.SUPPRESS,
                            action="store_true",
                            default=False)
        parser.add_argument("--force-single",
                            help="Treat all files as single reads",
                            action="store_true",
                            default=False)
    args = parser.parse_args(in_args)
    if hasattr(args, "workdir") and args.workdir:
        args.workdir = utils.safe_makedir(os.path.abspath(args.workdir))
    if hasattr(args, "global_config"):
        error_msg = _sanity_check_args(args)
        if error_msg:
            parser.error(error_msg)
        kwargs = {
            "parallel": clargs.to_parallel(args),
            "workflow": args.workflow,
            "workdir": args.workdir
        }
        kwargs = _add_inputs_to_kwargs(args, kwargs, parser)
        error_msg = _sanity_check_kwargs(kwargs)
        if error_msg:
            parser.error(error_msg)
    else:
        assert sub_cmd is not None
        kwargs = {"args": args, "config_file": None, sub_cmd: True}
    return kwargs
        system_config = os.path.join(_get_data_dir(), "galaxy",
                                     "bcbio_system.yaml")
    except ValueError as err:
        print(err)
        print(
            "WARNING: Attempting to read bcbio_system.yaml in the current directory."
        )
        system_config = "bcbio_system.yaml"

    with open(system_config) as in_handle:
        config = yaml.load(in_handle)
        res = {'cores': args.cores_per_job}
        config["algorithm"] = {"num_cores": args.cores_per_job}
        config["resources"].update({'sambamba': res, 'samtools': res})
        config["log_dir"] = os.path.join(os.path.abspath(os.getcwd()), "log")
    parallel = clargs.to_parallel(args)
    parallel.update({'progs': ['samtools', 'sambamba']})
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    dirs = {'work': os.path.abspath(os.getcwd())}
    system.write_info(dirs, parallel, config)
    sysinfo = system.machine_info()[0]
    samples = _get_samples_to_process(args.csv, out_dir, config,
                                      args.force_single, args.separators)
    parallel = resources.calculate(parallel, [samples], sysinfo, config)

    with prun.start(parallel, samples, config, dirs) as run_parallel:
        with profile.report("prepare bcbio samples", dirs):
            samples = run_parallel("prepare_bcbio_samples", samples)

    create_new_csv(samples, args)
예제 #10
0
                        choices=["lsf", "slurm", "torque", "sge", "pbspro"])
    parser.add_argument("-r", "--resources", help="Extra scheduler resource flags.", default=[], action="append")
    parser.add_argument("-q", "--queue", help="Queue to submit jobs to.")
    parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster", default="bcb-prep")
    parser.add_argument("-t", "--paralleltype",
                        choices=["local", "ipython"],
                        default="local", help="Run with iptyhon")

    args = parser.parse_args()
    system_config = os.path.join(_get_data_dir(), "galaxy", "bcbio_system.yaml")
    if args.galaxy:
        system_config = args.galaxy
    with open(system_config) as in_handle:
        config = yaml.load(in_handle)

    parallel = clargs.to_parallel(args)
    parallel.update({'progs': args.progs})
    dirs = {'work': os.path.abspath(os.getcwd())}
    if args.sys_info.find(";") > -1:
        info = args.sys_info.split(";")
        sysinfo = {'cores': int(info[0]), 'memory': float(info[1])}
    else:
        if utils.file_exists(args.sys_info):
            sysinfo = yaml.load(open(args.sys_info))[0]
    print "system info %s" % sysinfo
    samples = []
    pipelines, config = _pair_samples_with_pipelines(args.yaml_file, config)
    for s in pipelines:
        samples = [item for item in pipelines[s]]
    print "number of samples %s" % len(samples)
    print "after calculate fn"
예제 #11
0
def parse_cl_args(in_args):
    """Parse input commandline arguments, handling multiple cases.

    Returns the main config file and set of kwargs.
    """
    sub_cmds = {"upgrade": install.add_subparser,
                "server": server_main.add_subparser,
                "runfn": runfn.add_subparser,
                "version": programs.add_subparser}
    parser = argparse.ArgumentParser(
        description="Best-practice pipelines for fully automated high throughput sequencing analysis.")
    sub_cmd = None
    if len(in_args) > 0 and in_args[0] in sub_cmds:
        subparsers = parser.add_subparsers(help="bcbio-nextgen supplemental commands")
        sub_cmds[in_args[0]](subparsers)
        sub_cmd = in_args[0]
    else:
        parser.add_argument("global_config", help="Global YAML configuration file specifying details "
                            "about the system (optional, defaults to installed bcbio_system.yaml)",
                            nargs="?")
        parser.add_argument("fc_dir", help="A directory of Illumina output or fastq files to process (optional)",
                            nargs="?")
        parser.add_argument("run_config", help="YAML file with details about samples to process "
                            "(required, unless using Galaxy LIMS as input)",
                            nargs="*")
        parser.add_argument("-n", "--numcores", help="Total cores to use for processing",
                            type=int, default=1)
        parser.add_argument("-t", "--paralleltype", help="Approach to parallelization",
                            choices=["local", "ipython"], default="local")
        parser.add_argument("-s", "--scheduler", help="Scheduler to use for ipython parallel",
                            choices=["lsf", "sge", "torque", "slurm"])
        parser.add_argument("-q", "--queue", help="Scheduler queue to run jobs on, for ipython parallel")
        parser.add_argument("-r", "--resources",
                            help=("Cluster specific resources specifications. Can be specified multiple times.\n"
                                  "Supports SGE, Torque, LSF and SLURM parameters."),
                            default=[], action="append")
        parser.add_argument("--timeout", help="Number of minutes before cluster startup times out. Defaults to 15",
                            default=15, type=int)
        parser.add_argument("--retries",
                            help=("Number of retries of failed tasks during distributed processing. "
                                  "Default 0 (no retries)"),
                            default=0, type=int)
        parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster",
                            default="")
        parser.add_argument("-w", "--workflow", help="Run a workflow with the given commandline arguments")
        parser.add_argument("--workdir", help="Directory to process in. Defaults to current working directory",
                            default=os.getcwd())
        parser.add_argument("-v", "--version", help="Print current version",
                            action="store_true")
    args = parser.parse_args(in_args)
    if hasattr(args, "global_config"):
        error_msg = _sanity_check_args(args)
        if error_msg:
            parser.error(error_msg)
        kwargs = {"parallel": clargs.to_parallel(args),
                  "workflow": args.workflow,
                  "workdir": args.workdir}
        kwargs = _add_inputs_to_kwargs(args, kwargs, parser)
    else:
        assert sub_cmd is not None
        kwargs = {"args": args,
                  "config_file": None,
                  sub_cmd: True}
    return kwargs
예제 #12
0
def parse_cl_args(in_args):
    """Parse input commandline arguments, handling multiple cases.

    Returns the main config file and set of kwargs.
    """
    sub_cmds = {"upgrade": install.add_subparser,
                "server": server_main.add_subparser,
                "runfn": runfn.add_subparser,
                "version": programs.add_subparser,
                "sequencer": machine.add_subparser}
    parser = argparse.ArgumentParser(
        description="Best-practice pipelines for fully automated high throughput sequencing analysis.")
    sub_cmd = None
    if len(in_args) > 0 and in_args[0] in sub_cmds:
        subparsers = parser.add_subparsers(help="bcbio-nextgen supplemental commands")
        sub_cmds[in_args[0]](subparsers)
        sub_cmd = in_args[0]
    else:
        parser.add_argument("global_config", help="Global YAML configuration file specifying details "
                            "about the system (optional, defaults to installed bcbio_system.yaml)",
                            nargs="?")
        parser.add_argument("fc_dir", help="A directory of Illumina output or fastq files to process (optional)",
                            nargs="?")
        parser.add_argument("run_config", help="YAML file with details about samples to process "
                            "(required, unless using Galaxy LIMS as input)",
                            nargs="*")
        parser.add_argument("-n", "--numcores", help="Total cores to use for processing",
                            type=int, default=1)
        parser.add_argument("-t", "--paralleltype", help="Approach to parallelization",
                            choices=["local", "ipython"], default="local")
        parser.add_argument("-s", "--scheduler", help="Scheduler to use for ipython parallel",
                            choices=["lsf", "sge", "torque", "slurm"])
        parser.add_argument("-q", "--queue", help="Scheduler queue to run jobs on, for ipython parallel")
        parser.add_argument("-r", "--resources",
                            help=("Cluster specific resources specifications. Can be specified multiple times.\n"
                                  "Supports SGE, Torque, LSF and SLURM parameters."),
                            default=[], action="append")
        parser.add_argument("--timeout", help="Number of minutes before cluster startup times out. Defaults to 15",
                            default=15, type=int)
        parser.add_argument("--retries",
                            help=("Number of retries of failed tasks during distributed processing. "
                                  "Default 0 (no retries)"),
                            default=0, type=int)
        parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster",
                            default="")
        parser.add_argument("-w", "--workflow", help="Run a workflow with the given commandline arguments")
        parser.add_argument("--workdir", help="Directory to process in. Defaults to current working directory",
                            default=os.getcwd())
        parser.add_argument("-v", "--version", help="Print current version",
                            action="store_true")
    args = parser.parse_args(in_args)
    if hasattr(args, "global_config"):
        error_msg = _sanity_check_args(args)
        if error_msg:
            parser.error(error_msg)
        kwargs = {"parallel": clargs.to_parallel(args),
                  "workflow": args.workflow,
                  "workdir": args.workdir}
        kwargs = _add_inputs_to_kwargs(args, kwargs, parser)
        error_msg = _sanity_check_kwargs(kwargs)
        if error_msg:
            parser.error(error_msg)
    else:
        assert sub_cmd is not None
        kwargs = {"args": args,
                  "config_file": None,
                  sub_cmd: True}
    return kwargs