def run_parallel(fn_name, items, metadata=None): items = [x for x in items if x is not None] items = diagnostics.track_parallel(items, fn_name) imodule = parallel.get("module", "bcbio.distributed") if parallel["type"].startswith("messaging"): task_module = "{base}.tasks".format(base=imodule) runner_fn = runner(task_module, dirs, config, config_file) return runner_fn(fn_name, items) elif parallel["type"] == "ipython": return ipython.runner(parallel, fn_name, items, dirs["work"], config) else: logger.info("multiprocessing: %s" % fn_name) fn = getattr( __import__("{base}.multitasks".format(base=imodule), fromlist=["multitasks"]), fn_name) num_jobs, cores_per_job = ipython.find_cores_per_job([fn], parallel, items, config) items = [ ipython.add_cores_to_config(x, cores_per_job) for x in items ] num_jobs = cores_including_resources(num_jobs, metadata, config) if joblib is None: raise ImportError( "Need joblib for multiprocessing parallelization") out = [] for data in joblib.Parallel(num_jobs)(joblib.delayed(fn)(x) for x in items): if data: out.extend(data) return out
def run_parallel(fn_name, items, metadata=None): items = [x for x in items if x is not None] if len(items) == 0: return [] items = diagnostics.track_parallel(items, fn_name) imodule = parallel.get("module", "bcbio.distributed") sysinfo = system.get_info(dirs, parallel) if parallel["type"].startswith("messaging"): task_module = "{base}.tasks".format(base=imodule) runner_fn = runner(task_module, dirs, config, config_file) return runner_fn(fn_name, items) elif parallel["type"] == "ipython": return ipython.runner(parallel, fn_name, items, dirs["work"], sysinfo, config) else: logger.info("multiprocessing: %s" % fn_name) fn = getattr(__import__("{base}.multitasks".format(base=imodule), fromlist=["multitasks"]), fn_name) jobr = ipython.find_job_resources([fn], parallel, items, sysinfo, config) items = [ipython.add_cores_to_config(x, jobr.cores_per_job) for x in items] if joblib is None: raise ImportError("Need joblib for multiprocessing parallelization") out = [] for data in joblib.Parallel(jobr.num_jobs)(joblib.delayed(fn)(x) for x in items): if data: out.extend(data) return out
def run_multicore(fn, items, config, cores=None): """Run the function using multiple cores on the given items to process. """ if cores is None: cores = config["algorithm"].get("num_cores", 1) parallel = {"type": "local", "cores": cores} sysinfo = system.get_info({}, parallel) jobr = ipython.find_job_resources([fn], parallel, items, sysinfo, config, parallel.get("multiplier", 1), max_multicore=int(sysinfo["cores"])) items = [ipython.add_cores_to_config(x, jobr.cores_per_job) for x in items] if joblib is None: raise ImportError("Need joblib for multiprocessing parallelization") out = [] for data in joblib.Parallel(jobr.num_jobs)(joblib.delayed(fn)(x) for x in items): if data: out.extend(data) return out
def run_parallel(fn_name, items, metadata=None): if parallel["type"].startswith("messaging"): task_module = "{base}.tasks".format(base=parallel["module"]) runner_fn = runner(task_module, dirs, config, config_file) return runner_fn(fn_name, items) elif parallel["type"] == "ipython": return ipython.runner(parallel, fn_name, items, dirs["work"], config) else: out = [] fn = getattr(__import__("{base}.multitasks".format(base=parallel["module"]), fromlist=["multitasks"]), fn_name) cores = cores_including_resources(int(parallel["cores"]), metadata, config) with utils.cpmap(cores) as cpmap: for data in cpmap(fn, (ipython.add_cores_to_config(x, 1) for x in items if x is not None)): if data: out.extend(data) return out
def run_parallel(fn_name, items, metadata=None): items = [x for x in items if x is not None] items = diagnostics.track_parallel(items, fn_name) if parallel["type"].startswith("messaging"): task_module = "{base}.tasks".format(base=parallel["module"]) runner_fn = runner(task_module, dirs, config, config_file) return runner_fn(fn_name, items) elif parallel["type"] == "ipython": return ipython.runner(parallel, fn_name, items, dirs["work"], config) else: logger.info("multiprocessing: %s" % fn_name) out = [] fn = getattr(__import__("{base}.multitasks".format(base=parallel["module"]), fromlist=["multitasks"]), fn_name) num_jobs, cores_per_job = ipython.find_cores_per_job(fn, parallel, items, config) items = [ipython.add_cores_to_config(x, cores_per_job) for x in items] num_jobs = cores_including_resources(num_jobs, metadata, config) with utils.cpmap(num_jobs) as cpmap: for data in cpmap(fn, items): if data: out.extend(data) return out