Example #1
0
def _run_toplevel(config,
                  config_file,
                  work_dir,
                  parallel,
                  fc_dir=None,
                  run_info_yaml=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file)
    config_file = os.path.join(dirs["config"], os.path.basename(config_file))
    pipelines = _pair_samples_with_pipelines(run_info_yaml)
    system.write_info(dirs, parallel, config)
    final = []
    with tx_tmpdir(config) as tmpdir:
        tempfile.tempdir = tmpdir
        for pipeline, samples in pipelines.items():
            for xs in pipeline.run(config, run_info_yaml, parallel, dirs,
                                   samples):
                if len(xs) == 1:
                    upload.from_sample(xs[0])
                    final.append(xs[0])
Example #2
0
def _run_toplevel(config,
                  config_file,
                  work_dir,
                  parallel,
                  fc_dir=None,
                  run_info_yaml=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    logger.info("System YAML configuration: %s" % os.path.abspath(config_file))
    dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file)
    config_file = os.path.join(dirs["config"], os.path.basename(config_file))
    pipelines, config = _pair_samples_with_pipelines(run_info_yaml, config)
    system.write_info(dirs, parallel, config)
    with tx_tmpdir(config if parallel.get("type") ==
                   "local" else None) as tmpdir:
        tempfile.tempdir = tmpdir
        for pipeline, samples in pipelines.items():
            for xs in pipeline(config, run_info_yaml, parallel, dirs, samples):
                pass
Example #3
0
def _add_provenance(items, dirs, parallel, config):
    p = programs.write_versions(dirs, config, is_wrapper=parallel.get("wrapper") is not None)
    p_db = diagnostics.initialize(dirs)
    system.write_info(dirs, parallel, config)
    out = []
    for item in items:
        entity_id = diagnostics.store_entity(item)
        item["config"]["resources"]["program_versions"] = p
        item["provenance"] = {"programs": p, "entity": entity_id, "db": p_db}
        out.append([item])
    return out
Example #4
0
def _add_provenance(items, dirs, run_parallel, parallel, config):
    p = programs.write_versions(dirs, config)
    system.write_info(dirs, run_parallel, parallel, config)
    out = []
    for item in items:
        if item.get("upload") and item["upload"].get("fc_name"):
            entity_id = "%s.%s.%s" % (item["upload"]["fc_date"], item["upload"]["fc_name"], item["description"])
        else:
            entity_id = item["description"]
        item["config"]["resources"]["program_versions"] = p
        item["provenance"] = {"programs": p, "entity": entity_id}
        out.append([item])
    return out
Example #5
0
def _add_provenance(items, dirs, parallel, config):
    p = programs.write_versions(dirs,
                                config,
                                is_wrapper=parallel.get("wrapper") is not None)
    p_db = diagnostics.initialize(dirs)
    system.write_info(dirs, parallel, config)
    out = []
    for item in items:
        entity_id = diagnostics.store_entity(item)
        item["config"]["resources"]["program_versions"] = p
        item["provenance"] = {"programs": p, "entity": entity_id, "db": p_db}
        out.append([item])
    return out
Example #6
0
def _add_provenance(items, dirs, parallel, config):
    p = programs.write_versions(dirs, config, is_wrapper=parallel.get("wrapper") is not None)
    system.write_info(dirs, parallel, config)
    out = []
    for item in items:
        if item.get("upload") and item["upload"].get("fc_name"):
            entity_id = "%s.%s.%s" % (item["upload"]["fc_date"],
                                      item["upload"]["fc_name"],
                                      item["description"])
        else:
            entity_id = item["description"]
        item["config"]["resources"]["program_versions"] = p
        item["provenance"] = {"programs": p, "entity": entity_id}
        out.append([item])
    return out
Example #7
0
def _calculate_resources(data, args, resources):
    parallel = clargs.to_parallel(args)
    config = data[0][0]['config']
    config['resources'].update({resources['name']: {'memory': "%sg" % resources['mem'], 'cores': resources['cores']}})
    parallel.update({'progs': [resources['name']]})
    # parallel = log.create_base_logger(config, parallel)
    # log.setup_local_logging(config, parallel)
    log.setup_log(config, parallel)
    dirs = {'work': os.path.abspath(os.getcwd())}
    system.write_info(dirs, parallel, config)
    sysinfo = system.machine_info()[0]
    log.logger.info("Number of items %s" % len(data))
    parallel = res.calculate(parallel, data, sysinfo, config)
    log.logger.info(parallel)
    # print parallel
    # raise
    return parallel
Example #8
0
def _run_toplevel(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file)
    config_file = os.path.join(dirs["config"], os.path.basename(config_file))
    pipelines, config = _pair_samples_with_pipelines(run_info_yaml, config)
    system.write_info(dirs, parallel, config)
    with tx_tmpdir(config) as tmpdir:
        tempfile.tempdir = tmpdir
        for pipeline, samples in pipelines.items():
            for xs in pipeline.run(config, run_info_yaml, parallel, dirs, samples):
                pass
                                     "bcbio_system.yaml")
    except ValueError as err:
        print(err)
        print(
            "WARNING: Attempting to read bcbio_system.yaml in the current directory."
        )
        system_config = "bcbio_system.yaml"

    with open(system_config) as in_handle:
        config = yaml.load(in_handle)
        res = {'cores': args.cores_per_job}
        config["algorithm"] = {"num_cores": args.cores_per_job}
        config["resources"].update({'sambamba': res, 'samtools': res})
        config["log_dir"] = os.path.join(os.path.abspath(os.getcwd()), "log")
    parallel = clargs.to_parallel(args)
    parallel.update({'progs': ['samtools', 'sambamba']})
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    dirs = {'work': os.path.abspath(os.getcwd())}
    system.write_info(dirs, parallel, config)
    sysinfo = system.machine_info()[0]
    samples = _get_samples_to_process(args.csv, out_dir, config,
                                      args.force_single, args.separators)
    parallel = resources.calculate(parallel, [samples], sysinfo, config)

    with prun.start(parallel, samples, config, dirs) as run_parallel:
        with profile.report("prepare bcbio samples", dirs):
            samples = run_parallel("prepare_bcbio_samples", samples)

    create_new_csv(samples, args)
    parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster", default="bcb-prep")
    parser.add_argument("-t", "--paralleltype",
                        choices=["local", "ipython"],
                        default="local", help="Run with iptyhon")

    args = parser.parse_args()
    out_dir = os.path.abspath(args.out)
    utils.safe_makedir(out_dir)
    system_config = os.path.join(_get_data_dir(), "galaxy", "bcbio_system.yaml")
    with open(system_config) as in_handle:
        config = yaml.load(in_handle)
        res = {'cores': args.cores_per_job}
        config["algorithm"] = {"num_cores": args.cores_per_job}
        config["resources"].update({'sambamba': res,
                                    'samtools': res})
    parallel = clargs.to_parallel(args)
    parallel.update({'progs': ['samtools', 'sambamba']})
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    dirs = {'work': os.path.abspath(os.getcwd())}
    system.write_info(dirs, parallel, config)
    sysinfo = system.machine_info()[0]
    samples = _get_samples_to_process(args.csv, out_dir, config)
    parallel = resources.calculate(parallel, [samples], sysinfo, config)

    with prun.start(parallel, samples, config, dirs) as run_parallel:
        with profile.report("prepare bcbio samples", dirs):
            samples = run_parallel("prepare_bcbio_samples", samples)

    create_new_csv(samples, args)