def run_main(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ setup_logging(config) fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml) fastq_dir, galaxy_dir, config_dir = _get_full_paths(get_fastq_dir(fc_dir) if fc_dir else None, config, config_file) config_file = os.path.join(config_dir, os.path.basename(config_file)) dirs = {"fastq": fastq_dir, "galaxy": galaxy_dir, "work": work_dir, "flowcell": fc_dir, "config": config_dir} run_parallel = parallel_runner(parallel, dirs, config, config_file) # process each flowcell lane run_items = add_multiplex_across_lanes(run_info["details"], dirs["fastq"], fc_name) lanes = ((info, fc_name, fc_date, dirs, config) for info in run_items) lane_items = run_parallel("process_lane", lanes) pipelines = _pair_lanes_with_pipelines(lane_items) for pipeline, pipeline_items in pipelines.items(): for xs in pipeline.run(config, config_file, run_parallel, dirs, pipeline_items): assert len(xs) == 1 upload.from_sample(xs[0]) write_metrics(run_info, fc_name, fc_date, dirs)
def _run_toplevel(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) fastq_dir, galaxy_dir, config_dir = _get_full_paths(get_fastq_dir(fc_dir) if fc_dir else None, config, config_file) config_file = os.path.join(config_dir, os.path.basename(config_file)) dirs = {"fastq": fastq_dir, "galaxy": galaxy_dir, "work": work_dir, "flowcell": fc_dir, "config": config_dir} samples = run_info.organize(dirs, config, run_info_yaml) pipelines = _pair_lanes_with_pipelines(samples) final = [] with utils.curdir_tmpdir() as tmpdir: tempfile.tempdir = tmpdir for pipeline, pipeline_items in pipelines.items(): pipeline_items = _add_provenance(pipeline_items, dirs, parallel, config) versioncheck.testall(pipeline_items) for xs in pipeline.run(config, config_file, parallel, dirs, pipeline_items): if len(xs) == 1: upload.from_sample(xs[0]) final.append(xs[0])
def _run_toplevel(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) dirs = setup_directories(work_dir, fc_dir, config, config_file) config_file = os.path.join(dirs["config"], os.path.basename(config_file)) samples = run_info.organize(dirs, config, run_info_yaml) pipelines = _pair_samples_with_pipelines(samples) final = [] with tx_tmpdir(config) as tmpdir: tempfile.tempdir = tmpdir for pipeline, pipeline_items in pipelines.items(): pipeline_items = _add_provenance(pipeline_items, dirs, parallel, config) versioncheck.testall(pipeline_items) for xs in pipeline.run(config, config_file, parallel, dirs, pipeline_items): if len(xs) == 1: upload.from_sample(xs[0]) final.append(xs[0])
def _run_toplevel(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) fastq_dir, galaxy_dir, config_dir = _get_full_paths(get_fastq_dir(fc_dir) if fc_dir else None, config, config_file) config_file = os.path.join(config_dir, os.path.basename(config_file)) dirs = {"fastq": fastq_dir, "galaxy": galaxy_dir, "work": work_dir, "flowcell": fc_dir, "config": config_dir} run_items = run_info.organize(dirs, config, run_info_yaml) run_parallel = parallel_runner(parallel, dirs, config, config_file) # process each flowcell lane lane_items = lane.process_all_lanes(run_items, run_parallel) pipelines = _pair_lanes_with_pipelines(lane_items) final = [] with utils.curdir_tmpdir() as tmpdir: tempfile.tempdir = tmpdir for pipeline, pipeline_items in pipelines.items(): pipeline_items = _add_provenance(pipeline_items, dirs, run_parallel, parallel, config) versioncheck.testall(pipeline_items) for xs in pipeline.run(config, config_file, run_parallel, parallel, dirs, pipeline_items): if len(xs) == 1: upload.from_sample(xs[0]) final.append(xs[0])
def _run_toplevel(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None, samples=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process samples -- Pre-processed samples, useful if run inside of docker containers. """ parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file) config_file = os.path.join(dirs["config"], os.path.basename(config_file)) if samples: dockerized = True else: dockerized = False samples = run_info.organize(dirs, config, run_info_yaml) pipelines = _pair_samples_with_pipelines(samples) final = [] with tx_tmpdir(config) as tmpdir: tempfile.tempdir = tmpdir for pipeline, pipeline_items in pipelines.items(): pipeline_items = _add_provenance(pipeline_items, dirs, parallel, config) if not dockerized: versioncheck.testall(pipeline_items) for xs in pipeline.run(config, config_file, parallel, dirs, pipeline_items): if len(xs) == 1: upload.from_sample(xs[0]) final.append(xs[0])
def run_main(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ setup_logging(config) fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml) fastq_dir, galaxy_dir, config_dir = _get_full_paths( get_fastq_dir(fc_dir) if fc_dir else None, config, config_file) config_file = os.path.join(config_dir, os.path.basename(config_file)) dirs = { "fastq": fastq_dir, "galaxy": galaxy_dir, "work": work_dir, "flowcell": fc_dir, "config": config_dir } run_parallel = parallel_runner(parallel, dirs, config, config_file) # process each flowcell lane run_items = add_multiplex_across_lanes(run_info["details"], dirs["fastq"], fc_name) lanes = ((info, fc_name, fc_date, dirs, config) for info in run_items) lane_items = lane.process_all_lanes(lanes, run_parallel) pipelines = _pair_lanes_with_pipelines(lane_items) for pipeline, pipeline_items in pipelines.items(): pipeline_items = _add_provenance(pipeline_items, dirs, config) for xs in pipeline.run(config, config_file, run_parallel, dirs, pipeline_items): assert len(xs) == 1 upload.from_sample(xs[0]) qcsummary.write_metrics(run_info, fc_name, fc_date, dirs)
def _run_toplevel(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file) config_file = os.path.join(dirs["config"], os.path.basename(config_file)) pipelines = _pair_samples_with_pipelines(run_info_yaml) system.write_info(dirs, parallel, config) final = [] with tx_tmpdir(config) as tmpdir: tempfile.tempdir = tmpdir for pipeline, samples in pipelines.items(): for xs in pipeline.run(config, run_info_yaml, parallel, dirs, samples): if len(xs) == 1: upload.from_sample(xs[0]) final.append(xs[0])
def upload_samples(*args): return upload.from_sample(*args)