def main(transcriptome_file, regulator_file, species, out_file, n_workers, threads_per_worker): print('reading data') tf_info = pd.read_csv(regulator_file, sep='\t', index_col=0) tf_names = list(tf_info.loc[tf_info['Species'] == species].index) df = pd.read_csv(transcriptome_file, sep='\t', index_col=0) num_not_expressed = (df.std(axis=1) == 0).sum() print( f'removing {num_not_expressed} genes that have zero expression in all samples' ) #Filter genes that are not expressed df = df.loc[df.std(axis=1) > 0] print('starting scheduler') client = Client(n_workers=n_workers, threads_per_worker=threads_per_worker, memory_limit='48GB') try: network = grnboost2(expression_data=df.T, tf_names=tf_names, client_or_address=client, verbose=True) network.to_csv(out_file, sep='\t', header=False, index=False) except Exception as e: print('Module inference error') print(e) finally: client.close()
def shutdown(): """Shutdown all distributed cluster.""" mode = UserConfig().data.general.cluster_mode gpus = str(UserConfig().data.general.worker.gpus_per_job) if mode == ClusterMode.Single and gpus == "-1": return try: logging.info("Try to shutdown cluster.") from vega.core.trainer.utils import get_write_ip_master_local from distributed import Client ip, port = get_write_ip_master_local() if ip is None or port is None: logging.info( "Stand-alone mode, no need to shut down the cluster.") return shutdown_client = Client("{}:{}".format(ip, port)) logging.info("Cluster will be shut down.") shutdown_client.shutdown() shutdown_client.close() del shutdown_client logging.info("Cluster is shut down.") except Exception as e: logging.error("Pipeline's cluster shutdown error: {}".format( str(e))) logging.error(traceback.format_exc())
def _cancel(self, scheduler_address: Optional[str] = None) -> None: if not scheduler_address: scheduler_address = self.scheduler_address client = Client(scheduler_address) f = Future(self.name + " graph", client=client) f.cancel(force=True) client.close()
def dask_client(): cluster = LocalCluster(n_workers=NUM_WORKERS, threads_per_worker=2) client = Client(cluster) yield client # teardown client.close() cluster.close()
def test_dask_read_combine_instastack(self): from distributed import Client, LocalCluster from dask.distributed import wait cluster = LocalCluster(n_workers=1, threads_per_worker=1) c = Client(cluster) anxcor = Anxcor() anxcor.set_window_length(120.0) times = anxcor.get_starttimes(starttime_stamp, endtime_stamp, 0.5) bank = WavebankWrapper(source_dir) anxcor.add_dataset(bank, 'nodals') anxcor.save_at_task(target_dir, 'combine') result = anxcor.process(times,dask_client=c,stack=True) anxcor = Anxcor() anxcor.set_window_length(120.0) bank = WavebankWrapper(source_dir) anxcor.add_dataset(bank, 'nodals') anxcor.load_at_task(target_dir, 'combine') result = anxcor.process(times,dask_client=c,stack=True) how_many_nc = _how_many_fmt(target_dir, format='.nc') _clean_files_in_dir(target_dir) c.close() cluster.close() assert 48 == how_many_nc
class SchedulerComputeDepsInMemory(object): def setup(self): self.client = Client() # Generate 10 indepdent tasks x = [delayed(random.random)() for _ in range(10)] # Generate lots of interrelated dependent tasks n = 200 for _ in range(10, n): random_subset = [random.choice(x) for _ in range(5)] random_max = delayed(max)(random_subset) x.append(random_max) # Persist tasks into distributed memory and wait to finish y = self.client.persist(x) wait(y) self.x = x def teardown(self): self.client.close() def time_compute_deps_already_in_memory(self): """ Measure compute time when dependent tasks are already in memory. xref https://github.com/dask/distributed/pull/3293 """ compute(*self.x, scheduler=self.client)
class LocalDaskDistributor(DistributorBaseClass): """ Distributor using a local dask cluster and inproc communication. """ def __init__(self, n_workers): """ Initiates a LocalDaskDistributor instance. Parameters ---------- n_workers : int How many workers should the local dask cluster have? """ super().__init__() import tempfile from distributed import Client, LocalCluster # attribute .local_dir_ is the path where the local dask workers store temporary files self.local_dir_ = tempfile.mkdtemp() cluster = LocalCluster(n_workers=n_workers, processes=False, local_dir=self.local_dir_) self.client = Client(cluster) self.n_workers = n_workers def distribute(self, func, partitioned_chunks, kwargs): """ Calculates the features in a parallel fashion by distributing the map command to the dask workers on a local machine Parameters ---------- func : Callable Function to send to each worker. partitioned_chunks : List List of data chunks, each chunk is processed by one woker kwargs : Dict Parameters for the map function Returns ------- List The result of the calculation as a list - each item should be the result of the application of func to a single element. """ if isinstance(partitioned_chunks, Iterable): # since dask 2.0.0 client map no longer accepts iterables partitioned_chunks = list(partitioned_chunks) result = self.client.gather( self.client.map(partial(func, **kwargs), partitioned_chunks)) return result def close(self): """ Closes the connection to the local Dask Scheduler """ self.client.close()
def cancel(self, scheduler_address=None): if not scheduler_address: scheduler_address = self.scheduler_address client = Client(scheduler_address) f = Future(self.name + "_graph", client=client) f.cancel(force=True) client.close()
def shutdown_cluster(): """Shutdown all distributed cluster.""" # detect master is running if not General._parallel: return try: logging.info("Try to shutdown cluster.") # stop ReportServer from zeus.report import ReportServer ReportServer.stop() # stop Master from zeus.trainer.utils import load_master_ip from distributed import Client ip, port = load_master_ip() if ip is None or port is None: logging.info("Stand-alone mode, no need to shut down the cluster.") return shutdown_client = Client("{}:{}".format(ip, port)) logging.info("Cluster will be shut down.") shutdown_client.shutdown() shutdown_client.close() del shutdown_client time.sleep(15) logging.info("Cluster is shut down.") except Exception as e: logging.error("Pipeline's cluster shutdown error: {}".format(str(e))) logging.error(traceback.format_exc())
def submit( self, client: Client = None, scheduler_address: str = None, priority: int = None, resources: Dict[str, Any] = None, show_progress=False, **kwargs, ) -> None: if not priority: priority = self.priority if not resources: resources = self.resources if not client: client = Client(scheduler_address) self.scheduler_address = client.scheduler.address computation = client.compute(self.graph, retries=3, priority=priority, resources=resources) if show_progress: progress(computation) fire_and_forget(computation) if scheduler_address: client.close() return None
def wrapper(config: Dict, logconfig: Dict, cluster=None): if self.schema is not None: _config = self.schema(config) else: _config = config if cluster is not None: try: client = Client(cluster.scheduler_address) except Exception: traceback_str = traceback.format_exc() raise Exception('Error occurred. Original traceback ' 'is\n%s\n' % traceback_str) else: client = None try: func.main.config = config # type: ignore return func.main(**_config) # type: ignore except Exception: traceback_str = traceback.format_exc() raise Exception('Error occurred. Original traceback ' 'is\n%s\n' % traceback_str) finally: if client is not None: client.close()
def dask_client(): from distributed import Client client = Client(processes=False, threads_per_worker=1, dashboard_address=None) yield client client.close() del client
class ClusterDaskDistributor(DistributorBaseClass): """ Distributor using a dask cluster, meaning that the calculation is spread over a cluster """ def __init__(self, address): """ Sets up a distributor that connects to a Dask Scheduler to distribute the calculaton of the features :param address: the ip address and port number of the Dask Scheduler :type address: str """ from distributed import Client self.client = Client(address=address) def calculate_best_chunk_size(self, data_length): """ Uses the number of dask workers in the cluster (during execution time, meaning when you start the extraction) to find the optimal chunk_size. :param data_length: A length which defines how many calculations there need to be. :type data_length: int """ n_workers = len(self.client.scheduler_info()["workers"]) chunk_size, extra = divmod(data_length, n_workers * 5) if extra: chunk_size += 1 return chunk_size def distribute(self, func, partitioned_chunks, kwargs): """ Calculates the features in a parallel fashion by distributing the map command to the dask workers on a cluster :param func: the function to send to each worker. :type func: callable :param partitioned_chunks: The list of data chunks - each element is again a list of chunks - and should be processed by one worker. :type partitioned_chunks: iterable :param kwargs: parameters for the map function :type kwargs: dict of string to parameter :return: The result of the calculation as a list - each item should be the result of the application of func to a single element. """ if isinstance(partitioned_chunks, Iterable): # since dask 2.0.0 client map no longer accepts iterables partitioned_chunks = list(partitioned_chunks) result = self.client.gather( self.client.map(partial(func, **kwargs), partitioned_chunks)) return [item for sublist in result for item in sublist] def close(self): """ Closes the connection to the Dask Scheduler """ self.client.close()
def dask_client_close(daskclient: Client): """ Close the Dask Client Parameters ---------- daskclient : Dask distributed.Client """ daskclient.close() return
class ClusterDaskDistributor(DistributorBaseClass): """ Distributor using a dask cluster, meaning that the calculation is spread over a cluster """ def __init__(self, address): """ Sets up a distributor that connects to a Dask Scheduler to distribute the calculaton of the features :param address: the ip address and port number of the Dask Scheduler :type address: str """ from distributed import Client self.client = Client(address=address) def calculate_best_chunk_size(self, data_length): """ Uses the number of dask workers in the cluster (during execution time, meaning when you start the extraction) to find the optimal chunk_size. :param data_length: A length which defines how many calculations there need to be. :type data_length: int """ n_workers = len(self.client.scheduler_info()["workers"]) chunk_size, extra = divmod(data_length, n_workers * 5) if extra: chunk_size += 1 return chunk_size def distribute(self, func, partitioned_chunks, kwargs): """ Calculates the features in a parallel fashion by distributing the map command to the dask workers on a cluster :param func: the function to send to each worker. :type func: callable :param partitioned_chunks: The list of data chunks - each element is again a list of chunks - and should be processed by one worker. :type partitioned_chunks: iterable :param kwargs: parameters for the map function :type kwargs: dict of string to parameter :return: The result of the calculation as a list - each item should be the result of the application of func to a single element. """ result = self.client.gather(self.client.map(partial(func, **kwargs), partitioned_chunks)) return [item for sublist in result for item in sublist] def close(self): """ Closes the connection to the Dask Scheduler """ self.client.close()
def submit( self, client: Client = None, scheduler_address: str = None, priority: int = 1, to_dask: bool = True, ) -> Any: if self._is_running(): msg = f"Job {self.name} is already running. Please use Job._cancel() to cancel the job." raise JobAlreadyRunningError(msg) if to_dask: if client is None: self.scheduler_address = scheduler_address or os.getenv( "GRIZLY_DASK_SCHEDULER_ADDRESS") client = Client(scheduler_address) else: self.scheduler_address = client.scheduler.address self.logger.info(f"Submitting job {self.name}...") job_run = JobRun(job_name=self.name, logger=self.logger, db=self.db) job_run.status = "running" log_stream = self.__get_log_stream() start = time() try: result = self.func(*self.args, **self.kwargs) job_run.status = "success" except Exception: result = None job_run.status = "fail" _, exc_value, _ = sys.exc_info() job_run.traceback = traceback.format_exc() job_run.error = str(exc_value) job_run.logs = log_stream.getvalue() job_run.result = result if to_dask: client.close() self.logger.info( f"Job {self.name} finished with status {job_run.status}") end = time() job_run.finished_at = datetime.now(timezone.utc) job_run.duration = int(end - start) conditions_flags = self.__check_conditions(job_run) for condition, flag in conditions_flags.items(): if flag: self.__submit_downstream_jobs(condition=condition) return result
class LocalDaskDistributor(DistributorBaseClass): """ Distributor using a local dask cluster and inproc communication. """ def __init__(self, n_workers): """ Initiates a LocalDaskDistributor instance. :param n_workers: How many workers should the local dask cluster have? :type n_workers: int """ from distributed import LocalCluster, Client import tempfile # attribute .local_dir_ is the path where the local dask workers store temporary files self.local_dir_ = tempfile.mkdtemp() cluster = LocalCluster(n_workers=n_workers, processes=False, local_dir=self.local_dir_) self.client = Client(cluster) self.n_workers = n_workers def distribute(self, func, partitioned_chunks, kwargs): """ Calculates the features in a parallel fashion by distributing the map command to the dask workers on a local machine :param func: the function to send to each worker. :type func: callable :param partitioned_chunks: The list of data chunks - each element is again a list of chunks - and should be processed by one worker. :type partitioned_chunks: iterable :param kwargs: parameters for the map function :type kwargs: dict of string to parameter :return: The result of the calculation as a list - each item should be the result of the application of func to a single element. """ if isinstance(partitioned_chunks, Iterable): # since dask 2.0.0 client map no longer accepts iteratables partitioned_chunks = list(partitioned_chunks) result = self.client.gather( self.client.map(partial(func, **kwargs), partitioned_chunks)) return [item for sublist in result for item in sublist] def close(self): """ Closes the connection to the local Dask Scheduler """ self.client.close()
def test_no_undesired_distributed_cache_hit(loop): # Dask has a pickle cache for callables that are called many times. Because # the dask backends used to wrapp both the functions and the arguments # under instances of the Batch callable class this caching mechanism could # lead to bugs as described in: https://github.com/joblib/joblib/pull/1055 # The joblib-dask backend has been refactored to avoid bundling the # arguments as an attribute of the Batch instance to avoid this problem. # This test serves as non-regression problem. # Use a large number of input arguments to give the AutoBatchingMixin # enough tasks to kick-in. lists = [[] for _ in range(100)] np = pytest.importorskip('numpy') X = np.arange(int(1e6)) def isolated_operation(list_, data=None): if data is not None: np.testing.assert_array_equal(data, X) list_.append(uuid4().hex) return list_ cluster = LocalCluster(n_workers=1, threads_per_worker=2) client = Client(cluster) try: with parallel_backend('dask') as (ba, _): # dispatches joblib.parallel.BatchedCalls res = Parallel()(delayed(isolated_operation)(list_) for list_ in lists) # The original arguments should not have been mutated as the mutation # happens in the dask worker process. assert lists == [[] for _ in range(100)] # Here we did not pass any large numpy array as argument to # isolated_operation so no scattering event should happen under the # hood. counts = count_events('receive-from-scatter', client) assert sum(counts.values()) == 0 assert all([len(r) == 1 for r in res]) with parallel_backend('dask') as (ba, _): # Append a large array which will be scattered by dask, and # dispatch joblib._dask.Batch res = Parallel()(delayed(isolated_operation)(list_, data=X) for list_ in lists) # This time, auto-scattering should have kicked it. counts = count_events('receive-from-scatter', client) assert sum(counts.values()) > 0 assert all([len(r) == 1 for r in res]) finally: client.close() cluster.close()
def test_client(self): lc = LocalCluster(diagnostics_port=None) passed = Client(lc) client, shutdown_callback = _prepare_client(passed) self.assertEquals(client, passed) shutdown_callback() passed.close() lc.close() lc.status self.assertEquals(lc.status.value, 'closed')
def test_dask_execution(self): from distributed import Client, LocalCluster cluster = LocalCluster(n_workers=1, threads_per_worker=1) c = Client(cluster) anxcor = Anxcor() anxcor.set_window_length(120.0) times = anxcor.get_starttimes(starttime_stamp,endtime_stamp, 0.5) bank = WavebankWrapper(source_dir) anxcor.add_dataset(bank, 'nodals') result = anxcor.process(times,dask_client=c) pairs = list(result.coords['rec'].values)+ list(result.coords['src'].values) c.close() cluster.close() assert 6 ==len(pairs)
class LocalDaskDistributor(DistributorBaseClass): """ Distributor using a local dask cluster and inproc communication. """ def __init__(self, n_workers): """ Initiates a LocalDaskDistributor instance. :param n_workers: How many workers should the local dask cluster have? :type n_workers: int """ from distributed import LocalCluster, Client import tempfile # attribute .local_dir_ is the path where the local dask workers store temporary files self.local_dir_ = tempfile.mkdtemp() cluster = LocalCluster(n_workers=n_workers, processes=False, local_dir=self.local_dir_) self.client = Client(cluster) self.n_workers = n_workers def distribute(self, func, partitioned_chunks, kwargs): """ Calculates the features in a parallel fashion by distributing the map command to the dask workers on a local machine :param func: the function to send to each worker. :type func: callable :param partitioned_chunks: The list of data chunks - each element is again a list of chunks - and should be processed by one worker. :type partitioned_chunks: iterable :param kwargs: parameters for the map function :type kwargs: dict of string to parameter :return: The result of the calculation as a list - each item should be the result of the application of func to a single element. """ result = self.client.gather(self.client.map(partial(func, **kwargs), partitioned_chunks)) return [item for sublist in result for item in sublist] def close(self): """ Closes the connection to the local Dask Scheduler """ self.client.close()
def test_dask_execution_exclude_with_stack_number(self): from distributed import Client, LocalCluster cluster = LocalCluster(n_workers=1, threads_per_worker=1) c = Client(cluster) anxcor = Anxcor() anxcor.set_window_length(120) anxcor.set_task_kwargs('crosscorrelate', dict(max_tau_shift=20.0)) times = anxcor.get_starttimes(starttime_stamp, endtime_stamp, 0.5) bank = WavebankWrapper(source_dir) anxcor.set_must_exclude_single_stations('AX.1') anxcor.add_dataset(bank, 'nodals') result = anxcor.process(times, dask_client=c, stack=10) pairs = list(result.coords['rec'].values) + list(result.coords['src'].values) c.close() cluster.close() assert 4 == len(pairs)
def test_twpice_case(): """ Use a test case from TWP-ICE """ Grid0 = pyart.io.read_grid(pydda.tests.EXAMPLE_RADAR0) Grid1 = pyart.io.read_grid(pydda.tests.EXAMPLE_RADAR1) sounding = pyart.io.read_arm_sonde(pydda.tests.SOUNDING_PATH) u_init, v_init, w_init = pydda.initialization.make_wind_field_from_profile( Grid0, sounding[1], vel_field='corrected_velocity') Grids = pydda.retrieval.get_dd_wind_field( [Grid0, Grid1], u_init, v_init, w_init, Co=100, Cm=1500.0, Cz=0, Cmod=0.0, vel_name='corrected_velocity', refl_field='reflectivity', frz=5000.0, filt_iterations=0, mask_outside_opt=True, upper_bc=1) # In this test grid, we expect the mean flow to be to the southeast # Maximum updrafts should be at least 10 m/s u_mean = np.nanmean(Grids[0].fields['u']['data']) v_mean = np.nanmean(Grids[0].fields['v']['data']) w_max = np.max(Grids[0].fields['v']['data']) assert u_mean > 0 assert v_mean < 0 assert w_max > 10 # Now we will test the nesting. Do the same retrieval, and make sure # that we get the same result within a prescribed tolerance cluster = LocalCluster(n_workers=2, processes=True) client = Client(cluster) Grids2 = pydda.retrieval.get_dd_wind_field_nested( [Grid0, Grid1], u_init, v_init, w_init, client, Co=100, Cm=1500.0, Cz=0, Cmod=0.0, vel_name='corrected_velocity', refl_field='reflectivity', frz=5000.0, filt_iterations=0, mask_outside_opt=True, upper_bc=1) # Make sure features are correlated between both versions. No reason # to expect the same answer, but features should be correlated # Nesting tends to make the updrafts a bit better resolved, so expect # less of an outright correlation (but still strong) assert np.corrcoef(Grids2[0].fields["u"]["data"].flatten(), Grids[0].fields["u"]["data"].flatten())[0, 1] > 0.9 assert np.corrcoef(Grids2[0].fields["v"]["data"].flatten(), Grids[0].fields["v"]["data"].flatten())[0, 1] > 0.9 assert np.corrcoef(Grids2[0].fields["w"]["data"].flatten(), Grids[0].fields["w"]["data"].flatten())[0, 1] > 0.5 cluster.close() client.close()
def reduce_caps(capsules, allcpgs, min_capsule_len): cluster = LocalCluster(n_workers=multiprocessing.cpu_count() * 2, threads_per_worker=20) client = Client(cluster) capsule_names = list(capsules.keys()) capsules_bag = db.from_sequence(list(capsules.values())) capsules_intersect = capsules_bag.map(lambda x: np.intersect1d(x, allcpgs)) capsules_len = capsules_intersect.map(lambda x: x if len(x) >= min_capsule_len else []) # with get_task_stream(plot='save', filename="task-stream.html") as ts: capsules = capsules_len.compute() capsules = dict([(capsule_names[i], capsules[i].tolist()) for i in range(len(capsule_names)) if len(capsules[i])]) #print(list(capsules.keys())) client.close() return capsules
def main(): """ Use the Dask distributed client to run a function in parallel. """ client = Client(n_workers=8) numbers = [3, 4, 5, 8, 12, 18, 25] futures = [] for n in numbers: a = client.submit(adder, n) futures.append(a) results = client.gather(futures) print(results) client.close()
def test_wait_for_workers_timeout(): # Start a cluster with 0 worker: cluster = LocalCluster(n_workers=0, processes=False, threads_per_worker=2) client = Client(cluster) try: with parallel_backend('dask', wait_for_workers_timeout=0.1): # Short timeout: DaskDistributedBackend msg = "DaskDistributedBackend has no worker after 0.1 seconds." with pytest.raises(TimeoutError, match=msg): Parallel()(delayed(inc)(i) for i in range(10)) with parallel_backend('dask', wait_for_workers_timeout=0): # No timeout: fallback to generic joblib failure: msg = "DaskDistributedBackend has no active worker" with pytest.raises(RuntimeError, match=msg): Parallel()(delayed(inc)(i) for i in range(10)) finally: client.close() cluster.close()
def test_wait_for_workers(cluster_strategy): cluster = LocalCluster(n_workers=0, processes=False, threads_per_worker=2) client = Client(cluster) if cluster_strategy == "adaptive": cluster.adapt(minimum=0, maximum=2) elif cluster_strategy == "late_scaling": # Tell the cluster to start workers but this is a non-blocking call # and new workers might take time to connect. In this case the Parallel # call should wait for at least one worker to come up before starting # to schedule work. cluster.scale(2) try: with parallel_backend('dask'): # The following should wait a bit for at least one worker to # become available. Parallel()(delayed(inc)(i) for i in range(10)) finally: client.close() cluster.close()
def test_with_distributed_client(self): lc = LocalCluster(diagnostics_port=None) client = Client(lc) graph = create_graph(net1_ex_matrix, net1_gene_names, net1_tf_names, "GBM", SGBM_KWARGS, target_genes=list(self.test_range), client=client) network_df = client.compute(graph, sync=True) self.assertEquals(len(self.test_range), len(network_df['target'].unique())) client.close() lc.close()
class ClientSuite(object): def setup(self): self.client = Client() def teardown(self): self.client.close() def time_trivial_tasks(self): """ Measure scheduler and communication overhead by running a bunch of unrelated trivial tasks. """ @delayed(pure=True) def inc(x): return x + 1 L = [inc(i) for i in range(500)] total = delayed(sum, pure=True)(L) total.compute(scheduler=self.client)
def test_dont_select_closed_worker(): # Make sure distributed does not try to reuse a client from a # closed cluster (https://github.com/dask/distributed/issues/2840). with clean(threads=False): cluster = LocalCluster(n_workers=0) c = Client(cluster) cluster.scale(2) assert c == get_client() c.close() cluster.close() cluster2 = LocalCluster(n_workers=0) c2 = Client(cluster2) cluster2.scale(2) current_client = get_client() assert c2 == current_client cluster2.close() c2.close()
def dask_compute_grid(ddclient=None, func=None, **kwargs): temp_cluster = False completed = [] if ddclient is None: print('creating local dask distributed cluster...') # ddclient = Client() ddclient = Client() temp_cluster = True # print('cluster dashboard available at: ' + get_ddclient_dashboard_address(ddclient)) try: print('cluster dashboard available at: ' + dask_get_ddclient_dashboard_address(ddclient)) from IPython.display import display display(ddclient) tfunc = make_return_tuple(func) kwargs_list = ([(k, i) for i in v] for k, v in kwargs.items()) # tuple of cartesian products of {{(arg_name, arg_val) | arg_val in arg_vals} | arg_name in arg_names} cart_prod_tup = product(*kwargs_list) cart_prod_dicts = [dict(i) for i in cart_prod_tup] print('submitting {} jobs to cluster...'.format(len(cart_prod_dicts))) futures = [ddclient.submit(tfunc, **kwargs) for kwargs in cart_prod_dicts] print('computing jobs...') completed = ddclient.gather(futures) print('computation done') finally: if temp_cluster: print('shutting down cluster...') ddclient.close() print('done') return completed
def run(self, workflows: List[Workflow], overwrite_params: Dict = None) -> None: """[summary] Parameters ---------- workflows : List[Workflow] Workflows to run. overwrite_params : Dict, optional Workflow parameters to overwrite (applies to all passed workflows), by default None Returns ------- Dict Dictionary including each workflow together with its run status. """ self.logger.info(f"Checking for pending workflows...") if overwrite_params: self.logger.debug( f"Overwriting workflow parameters: {overwrite_params}") for workflow in workflows: self.overwrite_params(workflow, params=overwrite_params) client = Client(self.client_address) for workflow in workflows: if self.should_run(workflow): self.logger.info( f"Worfklow {workflow.name} has been enqueued...") workflow.env = self.env workflow.submit(client=client) else: self.logger.info(f"No pending workflows found") client.close() return None