Example #1
0
def run_main(config, config_file, work_dir, parallel,
         fc_dir=None, run_info_yaml=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """

    setup_logging(config)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)
    fastq_dir, galaxy_dir, config_dir = _get_full_paths(get_fastq_dir(fc_dir)
                                                        if fc_dir else None,
                                                        config, config_file)
    config_file = os.path.join(config_dir, os.path.basename(config_file))
    dirs = {"fastq": fastq_dir, "galaxy": galaxy_dir,
            "work": work_dir, "flowcell": fc_dir, "config": config_dir}
    run_parallel = parallel_runner(parallel, dirs, config, config_file)

    # process each flowcell lane
    run_items = add_multiplex_across_lanes(run_info["details"],
                                           dirs["fastq"], fc_name)
    lanes = ((info, fc_name, fc_date, dirs, config) for info in run_items)
    lane_items = run_parallel("process_lane", lanes)
    pipelines = _pair_lanes_with_pipelines(lane_items)
    for pipeline, pipeline_items in pipelines.items():
        for xs in pipeline.run(config, config_file, run_parallel, dirs, pipeline_items):
            assert len(xs) == 1
            upload.from_sample(xs[0])
    write_metrics(run_info, fc_name, fc_date, dirs)
Example #2
0
def _run_toplevel(config, config_file, work_dir, parallel,
                  fc_dir=None, run_info_yaml=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    fastq_dir, galaxy_dir, config_dir = _get_full_paths(get_fastq_dir(fc_dir)
                                                        if fc_dir else None,
                                                        config, config_file)
    config_file = os.path.join(config_dir, os.path.basename(config_file))
    dirs = {"fastq": fastq_dir, "galaxy": galaxy_dir,
            "work": work_dir, "flowcell": fc_dir, "config": config_dir}
    samples = run_info.organize(dirs, config, run_info_yaml)
    pipelines = _pair_lanes_with_pipelines(samples)
    final = []
    with utils.curdir_tmpdir() as tmpdir:
        tempfile.tempdir = tmpdir
        for pipeline, pipeline_items in pipelines.items():
            pipeline_items = _add_provenance(pipeline_items, dirs, parallel, config)
            versioncheck.testall(pipeline_items)
            for xs in pipeline.run(config, config_file, parallel, dirs, pipeline_items):
                if len(xs) == 1:
                    upload.from_sample(xs[0])
                    final.append(xs[0])
Example #3
0
def _run_toplevel(config,
                  config_file,
                  work_dir,
                  parallel,
                  fc_dir=None,
                  run_info_yaml=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    dirs = setup_directories(work_dir, fc_dir, config, config_file)
    config_file = os.path.join(dirs["config"], os.path.basename(config_file))
    samples = run_info.organize(dirs, config, run_info_yaml)
    pipelines = _pair_samples_with_pipelines(samples)
    final = []
    with tx_tmpdir(config) as tmpdir:
        tempfile.tempdir = tmpdir
        for pipeline, pipeline_items in pipelines.items():
            pipeline_items = _add_provenance(pipeline_items, dirs, parallel,
                                             config)
            versioncheck.testall(pipeline_items)
            for xs in pipeline.run(config, config_file, parallel, dirs,
                                   pipeline_items):
                if len(xs) == 1:
                    upload.from_sample(xs[0])
                    final.append(xs[0])
Example #4
0
def _run_toplevel(config, config_file, work_dir, parallel,
                  fc_dir=None, run_info_yaml=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    fastq_dir, galaxy_dir, config_dir = _get_full_paths(get_fastq_dir(fc_dir)
                                                        if fc_dir else None,
                                                        config, config_file)
    config_file = os.path.join(config_dir, os.path.basename(config_file))
    dirs = {"fastq": fastq_dir, "galaxy": galaxy_dir,
            "work": work_dir, "flowcell": fc_dir, "config": config_dir}
    run_items = run_info.organize(dirs, config, run_info_yaml)
    run_parallel = parallel_runner(parallel, dirs, config, config_file)

    # process each flowcell lane
    lane_items = lane.process_all_lanes(run_items, run_parallel)
    pipelines = _pair_lanes_with_pipelines(lane_items)
    final = []
    with utils.curdir_tmpdir() as tmpdir:
        tempfile.tempdir = tmpdir
        for pipeline, pipeline_items in pipelines.items():
            pipeline_items = _add_provenance(pipeline_items, dirs, run_parallel, parallel, config)
            versioncheck.testall(pipeline_items)
            for xs in pipeline.run(config, config_file, run_parallel, parallel, dirs, pipeline_items):
                if len(xs) == 1:
                    upload.from_sample(xs[0])
                    final.append(xs[0])
Example #5
0
def _run_toplevel(config, config_file, work_dir, parallel,
                  fc_dir=None, run_info_yaml=None, samples=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    samples -- Pre-processed samples, useful if run inside of docker containers.
    """
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file)
    config_file = os.path.join(dirs["config"], os.path.basename(config_file))
    if samples:
        dockerized = True
    else:
        dockerized = False
        samples = run_info.organize(dirs, config, run_info_yaml)
    pipelines = _pair_samples_with_pipelines(samples)
    final = []
    with tx_tmpdir(config) as tmpdir:
        tempfile.tempdir = tmpdir
        for pipeline, pipeline_items in pipelines.items():
            pipeline_items = _add_provenance(pipeline_items, dirs, parallel, config)
            if not dockerized:
                versioncheck.testall(pipeline_items)
            for xs in pipeline.run(config, config_file, parallel, dirs, pipeline_items):
                if len(xs) == 1:
                    upload.from_sample(xs[0])
                    final.append(xs[0])
Example #6
0
def run_main(config,
             config_file,
             work_dir,
             parallel,
             fc_dir=None,
             run_info_yaml=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """

    setup_logging(config)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)
    fastq_dir, galaxy_dir, config_dir = _get_full_paths(
        get_fastq_dir(fc_dir) if fc_dir else None, config, config_file)
    config_file = os.path.join(config_dir, os.path.basename(config_file))
    dirs = {
        "fastq": fastq_dir,
        "galaxy": galaxy_dir,
        "work": work_dir,
        "flowcell": fc_dir,
        "config": config_dir
    }
    run_parallel = parallel_runner(parallel, dirs, config, config_file)

    # process each flowcell lane
    run_items = add_multiplex_across_lanes(run_info["details"], dirs["fastq"],
                                           fc_name)
    lanes = ((info, fc_name, fc_date, dirs, config) for info in run_items)
    lane_items = lane.process_all_lanes(lanes, run_parallel)
    pipelines = _pair_lanes_with_pipelines(lane_items)
    for pipeline, pipeline_items in pipelines.items():
        pipeline_items = _add_provenance(pipeline_items, dirs, config)
        for xs in pipeline.run(config, config_file, run_parallel, dirs,
                               pipeline_items):
            assert len(xs) == 1
            upload.from_sample(xs[0])
    qcsummary.write_metrics(run_info, fc_name, fc_date, dirs)
Example #7
0
def _run_toplevel(config, config_file, work_dir, parallel,
                  fc_dir=None, run_info_yaml=None):
    """
    Run toplevel analysis, processing a set of input files.
    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """
    parallel = log.create_base_logger(config, parallel)
    log.setup_local_logging(config, parallel)
    dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file)
    config_file = os.path.join(dirs["config"], os.path.basename(config_file))
    pipelines = _pair_samples_with_pipelines(run_info_yaml)
    system.write_info(dirs, parallel, config)
    final = []
    with tx_tmpdir(config) as tmpdir:
        tempfile.tempdir = tmpdir
        for pipeline, samples in pipelines.items():
            for xs in pipeline.run(config, run_info_yaml, parallel, dirs, samples):
                if len(xs) == 1:
                    upload.from_sample(xs[0])
                    final.append(xs[0])
Example #8
0
def upload_samples(*args):
    return upload.from_sample(*args)
Example #9
0
def upload_samples(*args):
    return upload.from_sample(*args)