def register_cloudbutton(): """ Register Cloudbutton Backend to be called with joblib.parallel_backend("cloudbutton") """ try: from cloudbutton.util.joblib.cloudbutton_backend import CloudbuttonBackend register_parallel_backend("cloudbutton", CloudbuttonBackend) except ImportError: msg = ( "To use the cloudbutton backend you must first install the plugin. " "See https://github.com/Dahk/cloudbutton-backend.git " "for instructions.") raise ImportError(msg)
def test_template_submit(mock_file, mock_result, mock_pool): # Verify that creating child jobs from a template looks like we expect file_id = 17 mock_client = mock.Mock() mock_file.return_value = file_id factory = civis.parallel.make_backend_template_factory( from_template_id=1234, client=mock_client) n_calls = 3 register_parallel_backend('civis', factory) with parallel_backend('civis'): parallel = Parallel(n_jobs=5, pre_dispatch='n_jobs') parallel(delayed(sqrt)(i**2) for i in range(n_calls)) assert mock_file.call_count == 3, "Upload 3 functions to run" assert mock_pool().submit.call_count == n_calls, "Run 3 functions" for this_call in mock_pool().submit.call_args_list: assert this_call == mock.call(JOBLIB_FUNC_FILE_ID=file_id) assert mock_result.call_count == 3, "Create 3 results"
def setupJoblib(self, ipp_profile='default', cluster_id=None): """ Method to set ipyparallel backend to a running ipcluster Arguments --------- ipp_profile : string Name of ipcluster profile for the started ipcluster that will be set up """ import ipyparallel as ipp from ipyparallel.joblib import IPythonParallelBackend global joblib_rc, joblib_view, joblib_be joblib_rc = ipp.Client(profile=ipp_profile, cluster_id=cluster_id) joblib_view = joblib_rc.load_balanced_view() joblib_be = IPythonParallelBackend(view=joblib_view) register_parallel_backend('ipyparallel', lambda: joblib_be, make_default=True) self.backend = 'ipyparallel'
def _test_retries_helper(num_failures, max_submit_retries, should_fail, from_template_id, mock_file_to_civis, mock_result_cls, mock_custom_exec_cls, mock_executor_cls): mock_file_to_civis.return_value = 0 mock_result_cls.return_value.get.return_value = [123] # A function to raise fake API errors the first num_failures times it is # called. counter = {'n_failed': 0} def mock_submit(fn='', *args, **kwargs): if counter['n_failed'] < num_failures: counter['n_failed'] += 1 raise CivisAPIError(mock.MagicMock()) else: return mock.MagicMock(spec=ContainerFuture) mock_custom_exec_cls.return_value.submit.side_effect = mock_submit mock_executor_cls.return_value.submit.side_effect = mock_submit if from_template_id: factory = civis.parallel.make_backend_template_factory( from_template_id=from_template_id, max_submit_retries=max_submit_retries, client=create_client_mock()) else: factory = civis.parallel.make_backend_factory( max_submit_retries=max_submit_retries, client=create_client_mock()) register_parallel_backend('civis', factory) with parallel_backend('civis'): # NB: joblib >v0.11 relies on callbacks from the result object to # decide when it's done consuming inputs. We've mocked the result # object here, so Parallel must be called either with n_jobs=1 or # pre_dispatch='all' to consume the inputs all at once. parallel = Parallel(n_jobs=1, pre_dispatch='n_jobs') if should_fail: with pytest.raises(civis.parallel.JobSubmissionError): parallel(delayed(sqrt)(i**2) for i in range(3)) else: parallel(delayed(sqrt)(i**2) for i in range(3))
def _test_retries_helper(num_failures, max_submit_retries, should_fail, from_template_id, mock_file_to_civis, mock_result_cls, mock_custom_exec_cls, mock_executor_cls): mock_file_to_civis.return_value = 0 mock_result_cls.get.return_value = 123 # A function to raise fake API errors the first num_failures times it is # called. counter = {'n_failed': 0} def mock_submit(fn='', *args, **kwargs): if counter['n_failed'] < num_failures: counter['n_failed'] += 1 raise CivisAPIError(mock.MagicMock()) else: return mock.MagicMock(spec=ContainerFuture) mock_custom_exec_cls.return_value.submit.side_effect = mock_submit mock_executor_cls.return_value.submit.side_effect = mock_submit if from_template_id: factory = civis.parallel.make_backend_template_factory( from_template_id=from_template_id, max_submit_retries=max_submit_retries, client=mock.Mock()) else: factory = civis.parallel.make_backend_factory( max_submit_retries=max_submit_retries, client=mock.Mock()) register_parallel_backend('civis', factory) with parallel_backend('civis'): parallel = Parallel(n_jobs=5, pre_dispatch='n_jobs') if should_fail: with pytest.raises(civis.parallel.JobSubmissionError): parallel(delayed(sqrt)(i ** 2) for i in range(3)) else: parallel(delayed(sqrt)(i ** 2) for i in range(3))
def test_template_submit(mock_file, mock_result, mock_pool): # Verify that creating child jobs from a template looks like we expect file_id = 17 mock_client = create_client_mock() mock_file.return_value = file_id factory = civis.parallel.make_backend_template_factory( from_template_id=1234, client=mock_client) n_calls = 3 register_parallel_backend('civis', factory) with parallel_backend('civis'): # NB: joblib >v0.11 relies on callbacks from the result object to # decide when it's done consuming inputs. We've mocked the result # object here, so Parallel must be called either with n_jobs=1 or # pre_dispatch='all' to consume the inputs all at once. parallel = Parallel(n_jobs=1, pre_dispatch='n_jobs') parallel(delayed(sqrt)(i**2) for i in range(n_calls)) assert mock_file.call_count == 3, "Upload 3 functions to run" assert mock_pool().submit.call_count == n_calls, "Run 3 functions" for this_call in mock_pool().submit.call_args_list: assert this_call == mock.call(JOBLIB_FUNC_FILE_ID=file_id) assert mock_result.call_count == 3, "Create 3 results"
# will suffice for the actual working of this param assert tree.min_impurity_decrease == 0.1 # mypy error: Variable "DEFAULT_JOBLIB_BACKEND" is not valid type class MyBackend(DEFAULT_JOBLIB_BACKEND): # type: ignore def __init__(self, *args, **kwargs): self.count = 0 super().__init__(*args, **kwargs) def start_call(self): self.count += 1 return super().start_call() joblib.register_parallel_backend('testing', MyBackend) @pytest.mark.skipif(parse_version(joblib.__version__) < parse_version('0.12'), reason='tests not yet supported in joblib <0.12') @skip_if_no_parallel def test_backend_respected(): clf = RandomForestClassifier(n_estimators=10, n_jobs=2) with joblib.parallel_backend("testing") as (ba, n_jobs): clf.fit(X, y) assert ba.count > 0 # predict_proba requires shared memory. Ensure that's honored. with joblib.parallel_backend("testing") as (ba, _):
self.futures.add(future) @gen.coroutine def callback_wrapper(): result = yield _wait([future]) self.futures.remove(future) callback(result) # gets called in separate thread self.client.loop.add_callback(callback_wrapper) future.get = future.result # monkey patch to achieve AsyncResult API return future def abort_everything(self, ensure_ready=True): # Tell the client to cancel any task submitted via this instance # as joblib.Parallel will never access those results. self.client.cancel(self.futures) self.futures.clear() DistributedBackend = DaskDistributedBackend # Register the backend with any available versions of joblib if joblib: joblib.register_parallel_backend('distributed', DaskDistributedBackend) joblib.register_parallel_backend('dask.distributed', DaskDistributedBackend) if sk_joblib: sk_joblib.register_parallel_backend('distributed', DaskDistributedBackend) sk_joblib.register_parallel_backend('dask.distributed', DaskDistributedBackend)
self.futures.add(future) @gen.coroutine def callback_wrapper(): result = yield _wait([future]) self.futures.remove(future) callback(result) # gets called in separate thread self.client.loop.add_callback(callback_wrapper) future.get = future.result # monkey patch to achieve AsyncResult API return future def abort_everything(self, ensure_ready=True): # Tell the client to cancel any task submitted via this instance # as joblib.Parallel will never access those results. self.client.cancel(self.futures) self.futures.clear() DistributedBackend = DaskDistributedBackend # Register the backend with any available versions of joblib if joblib: joblib.register_parallel_backend("distributed", DaskDistributedBackend) joblib.register_parallel_backend("dask.distributed", DaskDistributedBackend) if sk_joblib: sk_joblib.register_parallel_backend("distributed", DaskDistributedBackend) sk_joblib.register_parallel_backend("dask.distributed", DaskDistributedBackend)
parser = argparse.ArgumentParser() parser.add_argument("-p", "--profile", default="ipy_profile", help="Name of IPython profile to use") args = parser.parse_args() profile = args.profile #prepare the engines client = Client(profile=profile) #The following command will make sure that each engine is running in # the right working directory to access the custom function(s). client[:].map(os.chdir, [FILE_DIR] * len(client)) bview = client.load_balanced_view() register_parallel_backend('ipyparallel', lambda: IPythonParallelBackend(view=bview)) s = 'len(client)={}, joblib.cpu_count()={}, mpr.cpu_count()={}'.\ format( len(client), joblib.cpu_count(), mpr.cpu_count()) print(s) logging.basicConfig(filename=os.path.join(FILE_DIR, profile + '.log'), filemode='w', level=logging.DEBUG) logging.info("number of CPUs found: {0}".format(cpu_count())) logging.info("args.profile: {0}".format(profile)) logging.info("c.ids :{0}".format(str(client.ids))) logging.info("{}".format(s)) from module_joblib_parallel_test import fun
param_grid = { 'max_depth': [10], 'min_samples_leaf': [1, 2], 'min_samples_split': [2], } # TODO : fix values appropriately n_jobs = -1 nb_folds = 3 verbose = 100 backend = "dask" fit_params = {} scorer = metrics.f1_score register_parallel_backend(backend, DaskDistributedBackend) gs_estimator = GridSearchCV(estimator=estimator, param_grid=param_grid) logging.info("Entering Dask Context") with parallel_backend("dask"): logging.info("Entered Dask Context") logging.info( "Running GridSearchCV.fit with %s as a parallel back-end" % backend) gs_estimator.fit(X, y) logging.info("Done running GridSearchCV.fit")
"""Example showing how to use joblib-hadoop with an YARN cluster""" from math import sqrt from joblib import (Parallel, delayed, register_parallel_backend, parallel_backend) from joblibhadoop.yarn import YarnBackend if __name__ == '__main__': register_parallel_backend('yarn', YarnBackend) # Run in parallel using Yarn backend with parallel_backend('yarn', n_jobs=5): print(Parallel(verbose=100)( delayed(sqrt)(i**2) for i in range(100))) # Should be executed in parallel locally print(Parallel(verbose=100, n_jobs=5)( delayed(sqrt)(i**2) for i in range(100)))
super().__init__(*args, **kwargs) def start_call(self): self.tqdm = tqdm(total=self._job_count, unit='tasks') self._orig_print_progress = self.parallel.print_progress self.parallel.print_progress = self.update_progress def update_progress(self): try: self.tqdm.update(1) except: self._origin_print_progress() def stop_call(self): try: self.tqdm.close() except: self._origin_print_progress() def terminate(self): try: self.tqdm.close() except: pass finally: super().terminate() #: register joblib parallel omegaml backend joblib.register_parallel_backend('omegaml', OmegaRuntimeBackend)
def apply_async(self, func, *args, **kwargs): callback = kwargs.pop('callback', None) kwargs['pure'] = False future = self.executor.submit(func, *args, **kwargs) self.futures.add(future) @gen.coroutine def callback_wrapper(): result = yield _wait([future]) self.futures.remove(future) callback(result) # gets called in separate thread self.executor.loop.add_callback(callback_wrapper) future.get = future.result # monkey patch to achieve AsyncResult API return future def abort_everything(self, ensure_ready=True): # Tell the executor to cancel any task submitted via this instance # as joblib.Parallel will never access those results. self.executor.cancel(self.futures) self.futures.clear() # Register the backend with any available versions of joblib if joblib: joblib.register_parallel_backend('distributed', DistributedBackend) if sk_joblib: sk_joblib.register_parallel_backend('distributed', DistributedBackend)