def _run_toplevel(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file) config_file = os.path.join(dirs["config"], os.path.basename(config_file)) pipelines = _pair_samples_with_pipelines(run_info_yaml) system.write_info(dirs, parallel, config) final = [] with tx_tmpdir(config) as tmpdir: tempfile.tempdir = tmpdir for pipeline, samples in pipelines.items(): for xs in pipeline.run(config, run_info_yaml, parallel, dirs, samples): if len(xs) == 1: upload.from_sample(xs[0]) final.append(xs[0])
def _run_toplevel(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) logger.info("System YAML configuration: %s" % os.path.abspath(config_file)) dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file) config_file = os.path.join(dirs["config"], os.path.basename(config_file)) pipelines, config = _pair_samples_with_pipelines(run_info_yaml, config) system.write_info(dirs, parallel, config) with tx_tmpdir(config if parallel.get("type") == "local" else None) as tmpdir: tempfile.tempdir = tmpdir for pipeline, samples in pipelines.items(): for xs in pipeline(config, run_info_yaml, parallel, dirs, samples): pass
def _add_provenance(items, dirs, parallel, config): p = programs.write_versions(dirs, config, is_wrapper=parallel.get("wrapper") is not None) p_db = diagnostics.initialize(dirs) system.write_info(dirs, parallel, config) out = [] for item in items: entity_id = diagnostics.store_entity(item) item["config"]["resources"]["program_versions"] = p item["provenance"] = {"programs": p, "entity": entity_id, "db": p_db} out.append([item]) return out
def _add_provenance(items, dirs, run_parallel, parallel, config): p = programs.write_versions(dirs, config) system.write_info(dirs, run_parallel, parallel, config) out = [] for item in items: if item.get("upload") and item["upload"].get("fc_name"): entity_id = "%s.%s.%s" % (item["upload"]["fc_date"], item["upload"]["fc_name"], item["description"]) else: entity_id = item["description"] item["config"]["resources"]["program_versions"] = p item["provenance"] = {"programs": p, "entity": entity_id} out.append([item]) return out
def _add_provenance(items, dirs, parallel, config): p = programs.write_versions(dirs, config, is_wrapper=parallel.get("wrapper") is not None) system.write_info(dirs, parallel, config) out = [] for item in items: if item.get("upload") and item["upload"].get("fc_name"): entity_id = "%s.%s.%s" % (item["upload"]["fc_date"], item["upload"]["fc_name"], item["description"]) else: entity_id = item["description"] item["config"]["resources"]["program_versions"] = p item["provenance"] = {"programs": p, "entity": entity_id} out.append([item]) return out
def _calculate_resources(data, args, resources): parallel = clargs.to_parallel(args) config = data[0][0]['config'] config['resources'].update({resources['name']: {'memory': "%sg" % resources['mem'], 'cores': resources['cores']}}) parallel.update({'progs': [resources['name']]}) # parallel = log.create_base_logger(config, parallel) # log.setup_local_logging(config, parallel) log.setup_log(config, parallel) dirs = {'work': os.path.abspath(os.getcwd())} system.write_info(dirs, parallel, config) sysinfo = system.machine_info()[0] log.logger.info("Number of items %s" % len(data)) parallel = res.calculate(parallel, data, sysinfo, config) log.logger.info(parallel) # print parallel # raise return parallel
def _run_toplevel(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """ Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) dirs = run_info.setup_directories(work_dir, fc_dir, config, config_file) config_file = os.path.join(dirs["config"], os.path.basename(config_file)) pipelines, config = _pair_samples_with_pipelines(run_info_yaml, config) system.write_info(dirs, parallel, config) with tx_tmpdir(config) as tmpdir: tempfile.tempdir = tmpdir for pipeline, samples in pipelines.items(): for xs in pipeline.run(config, run_info_yaml, parallel, dirs, samples): pass
"bcbio_system.yaml") except ValueError as err: print(err) print( "WARNING: Attempting to read bcbio_system.yaml in the current directory." ) system_config = "bcbio_system.yaml" with open(system_config) as in_handle: config = yaml.load(in_handle) res = {'cores': args.cores_per_job} config["algorithm"] = {"num_cores": args.cores_per_job} config["resources"].update({'sambamba': res, 'samtools': res}) config["log_dir"] = os.path.join(os.path.abspath(os.getcwd()), "log") parallel = clargs.to_parallel(args) parallel.update({'progs': ['samtools', 'sambamba']}) parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) dirs = {'work': os.path.abspath(os.getcwd())} system.write_info(dirs, parallel, config) sysinfo = system.machine_info()[0] samples = _get_samples_to_process(args.csv, out_dir, config, args.force_single, args.separators) parallel = resources.calculate(parallel, [samples], sysinfo, config) with prun.start(parallel, samples, config, dirs) as run_parallel: with profile.report("prepare bcbio samples", dirs): samples = run_parallel("prepare_bcbio_samples", samples) create_new_csv(samples, args)
parser.add_argument("-p", "--tag", help="Tag name to label jobs on the cluster", default="bcb-prep") parser.add_argument("-t", "--paralleltype", choices=["local", "ipython"], default="local", help="Run with iptyhon") args = parser.parse_args() out_dir = os.path.abspath(args.out) utils.safe_makedir(out_dir) system_config = os.path.join(_get_data_dir(), "galaxy", "bcbio_system.yaml") with open(system_config) as in_handle: config = yaml.load(in_handle) res = {'cores': args.cores_per_job} config["algorithm"] = {"num_cores": args.cores_per_job} config["resources"].update({'sambamba': res, 'samtools': res}) parallel = clargs.to_parallel(args) parallel.update({'progs': ['samtools', 'sambamba']}) parallel = log.create_base_logger(config, parallel) log.setup_local_logging(config, parallel) dirs = {'work': os.path.abspath(os.getcwd())} system.write_info(dirs, parallel, config) sysinfo = system.machine_info()[0] samples = _get_samples_to_process(args.csv, out_dir, config) parallel = resources.calculate(parallel, [samples], sysinfo, config) with prun.start(parallel, samples, config, dirs) as run_parallel: with profile.report("prepare bcbio samples", dirs): samples = run_parallel("prepare_bcbio_samples", samples) create_new_csv(samples, args)