Example #1
0
def register_cloudbutton():
    """ Register Cloudbutton Backend to be called with
        joblib.parallel_backend("cloudbutton") """
    try:
        from cloudbutton.util.joblib.cloudbutton_backend import CloudbuttonBackend
        register_parallel_backend("cloudbutton", CloudbuttonBackend)
    except ImportError:
        msg = (
            "To use the cloudbutton backend you must first install the plugin. "
            "See https://github.com/Dahk/cloudbutton-backend.git "
            "for instructions.")
        raise ImportError(msg)
Example #2
0
def test_template_submit(mock_file, mock_result, mock_pool):
    # Verify that creating child jobs from a template looks like we expect
    file_id = 17
    mock_client = mock.Mock()
    mock_file.return_value = file_id

    factory = civis.parallel.make_backend_template_factory(
        from_template_id=1234, client=mock_client)

    n_calls = 3
    register_parallel_backend('civis', factory)
    with parallel_backend('civis'):
        parallel = Parallel(n_jobs=5, pre_dispatch='n_jobs')
        parallel(delayed(sqrt)(i**2) for i in range(n_calls))

    assert mock_file.call_count == 3, "Upload 3 functions to run"
    assert mock_pool().submit.call_count == n_calls, "Run 3 functions"
    for this_call in mock_pool().submit.call_args_list:
        assert this_call == mock.call(JOBLIB_FUNC_FILE_ID=file_id)
    assert mock_result.call_count == 3, "Create 3 results"
Example #3
0
    def setupJoblib(self, ipp_profile='default', cluster_id=None):
        """
        Method to set ipyparallel backend to a running ipcluster
        Arguments
        ---------
        ipp_profile : string
            Name of ipcluster profile for the started ipcluster that will be set up
        """

        import ipyparallel as ipp
        from ipyparallel.joblib import IPythonParallelBackend
        global joblib_rc, joblib_view, joblib_be
        joblib_rc = ipp.Client(profile=ipp_profile, cluster_id=cluster_id)
        joblib_view = joblib_rc.load_balanced_view()
        joblib_be = IPythonParallelBackend(view=joblib_view)
        register_parallel_backend('ipyparallel',
                                  lambda: joblib_be,
                                  make_default=True)

        self.backend = 'ipyparallel'
def _test_retries_helper(num_failures, max_submit_retries, should_fail,
                         from_template_id, mock_file_to_civis, mock_result_cls,
                         mock_custom_exec_cls, mock_executor_cls):

    mock_file_to_civis.return_value = 0
    mock_result_cls.return_value.get.return_value = [123]

    # A function to raise fake API errors the first num_failures times it is
    # called.
    counter = {'n_failed': 0}

    def mock_submit(fn='', *args, **kwargs):
        if counter['n_failed'] < num_failures:
            counter['n_failed'] += 1
            raise CivisAPIError(mock.MagicMock())
        else:
            return mock.MagicMock(spec=ContainerFuture)

    mock_custom_exec_cls.return_value.submit.side_effect = mock_submit
    mock_executor_cls.return_value.submit.side_effect = mock_submit

    if from_template_id:
        factory = civis.parallel.make_backend_template_factory(
            from_template_id=from_template_id,
            max_submit_retries=max_submit_retries,
            client=create_client_mock())
    else:
        factory = civis.parallel.make_backend_factory(
            max_submit_retries=max_submit_retries, client=create_client_mock())
    register_parallel_backend('civis', factory)
    with parallel_backend('civis'):
        # NB: joblib >v0.11 relies on callbacks from the result object to
        # decide when it's done consuming inputs. We've mocked the result
        # object here, so Parallel must be called either with n_jobs=1 or
        # pre_dispatch='all' to consume the inputs all at once.
        parallel = Parallel(n_jobs=1, pre_dispatch='n_jobs')
        if should_fail:
            with pytest.raises(civis.parallel.JobSubmissionError):
                parallel(delayed(sqrt)(i**2) for i in range(3))
        else:
            parallel(delayed(sqrt)(i**2) for i in range(3))
def _test_retries_helper(num_failures, max_submit_retries,
                         should_fail, from_template_id,
                         mock_file_to_civis, mock_result_cls,
                         mock_custom_exec_cls, mock_executor_cls):

    mock_file_to_civis.return_value = 0
    mock_result_cls.get.return_value = 123

    # A function to raise fake API errors the first num_failures times it is
    # called.
    counter = {'n_failed': 0}

    def mock_submit(fn='', *args, **kwargs):
        if counter['n_failed'] < num_failures:
            counter['n_failed'] += 1
            raise CivisAPIError(mock.MagicMock())
        else:
            return mock.MagicMock(spec=ContainerFuture)

    mock_custom_exec_cls.return_value.submit.side_effect = mock_submit
    mock_executor_cls.return_value.submit.side_effect = mock_submit

    if from_template_id:
        factory = civis.parallel.make_backend_template_factory(
            from_template_id=from_template_id,
            max_submit_retries=max_submit_retries,
            client=mock.Mock())
    else:
        factory = civis.parallel.make_backend_factory(
            max_submit_retries=max_submit_retries, client=mock.Mock())
    register_parallel_backend('civis', factory)
    with parallel_backend('civis'):
        parallel = Parallel(n_jobs=5, pre_dispatch='n_jobs')
        if should_fail:
            with pytest.raises(civis.parallel.JobSubmissionError):
                parallel(delayed(sqrt)(i ** 2) for i in range(3))
        else:
            parallel(delayed(sqrt)(i ** 2) for i in range(3))
def test_template_submit(mock_file, mock_result, mock_pool):
    # Verify that creating child jobs from a template looks like we expect
    file_id = 17
    mock_client = create_client_mock()
    mock_file.return_value = file_id

    factory = civis.parallel.make_backend_template_factory(
        from_template_id=1234, client=mock_client)

    n_calls = 3
    register_parallel_backend('civis', factory)
    with parallel_backend('civis'):
        # NB: joblib >v0.11 relies on callbacks from the result object to
        # decide when it's done consuming inputs. We've mocked the result
        # object here, so Parallel must be called either with n_jobs=1 or
        # pre_dispatch='all' to consume the inputs all at once.
        parallel = Parallel(n_jobs=1, pre_dispatch='n_jobs')
        parallel(delayed(sqrt)(i**2) for i in range(n_calls))

    assert mock_file.call_count == 3, "Upload 3 functions to run"
    assert mock_pool().submit.call_count == n_calls, "Run 3 functions"
    for this_call in mock_pool().submit.call_args_list:
        assert this_call == mock.call(JOBLIB_FUNC_FILE_ID=file_id)
    assert mock_result.call_count == 3, "Create 3 results"
Example #7
0
            # will suffice for the actual working of this param
            assert tree.min_impurity_decrease == 0.1


# mypy error: Variable "DEFAULT_JOBLIB_BACKEND" is not valid type
class MyBackend(DEFAULT_JOBLIB_BACKEND):  # type: ignore
    def __init__(self, *args, **kwargs):
        self.count = 0
        super().__init__(*args, **kwargs)

    def start_call(self):
        self.count += 1
        return super().start_call()


joblib.register_parallel_backend('testing', MyBackend)


@pytest.mark.skipif(parse_version(joblib.__version__) < parse_version('0.12'),
                    reason='tests not yet supported in joblib <0.12')
@skip_if_no_parallel
def test_backend_respected():
    clf = RandomForestClassifier(n_estimators=10, n_jobs=2)

    with joblib.parallel_backend("testing") as (ba, n_jobs):
        clf.fit(X, y)

    assert ba.count > 0

    # predict_proba requires shared memory. Ensure that's honored.
    with joblib.parallel_backend("testing") as (ba, _):
Example #8
0
        self.futures.add(future)

        @gen.coroutine
        def callback_wrapper():
            result = yield _wait([future])
            self.futures.remove(future)
            callback(result)  # gets called in separate thread

        self.client.loop.add_callback(callback_wrapper)

        future.get = future.result  # monkey patch to achieve AsyncResult API
        return future

    def abort_everything(self, ensure_ready=True):
        # Tell the client to cancel any task submitted via this instance
        # as joblib.Parallel will never access those results.
        self.client.cancel(self.futures)
        self.futures.clear()


DistributedBackend = DaskDistributedBackend


# Register the backend with any available versions of joblib
if joblib:
    joblib.register_parallel_backend('distributed', DaskDistributedBackend)
    joblib.register_parallel_backend('dask.distributed', DaskDistributedBackend)
if sk_joblib:
    sk_joblib.register_parallel_backend('distributed', DaskDistributedBackend)
    sk_joblib.register_parallel_backend('dask.distributed', DaskDistributedBackend)
Example #9
0
        self.futures.add(future)

        @gen.coroutine
        def callback_wrapper():
            result = yield _wait([future])
            self.futures.remove(future)
            callback(result)  # gets called in separate thread

        self.client.loop.add_callback(callback_wrapper)

        future.get = future.result  # monkey patch to achieve AsyncResult API
        return future

    def abort_everything(self, ensure_ready=True):
        # Tell the client to cancel any task submitted via this instance
        # as joblib.Parallel will never access those results.
        self.client.cancel(self.futures)
        self.futures.clear()


DistributedBackend = DaskDistributedBackend


# Register the backend with any available versions of joblib
if joblib:
    joblib.register_parallel_backend("distributed", DaskDistributedBackend)
    joblib.register_parallel_backend("dask.distributed", DaskDistributedBackend)
if sk_joblib:
    sk_joblib.register_parallel_backend("distributed", DaskDistributedBackend)
    sk_joblib.register_parallel_backend("dask.distributed", DaskDistributedBackend)
Example #10
0
parser = argparse.ArgumentParser()
parser.add_argument("-p",
                    "--profile",
                    default="ipy_profile",
                    help="Name of IPython profile to use")
args = parser.parse_args()
profile = args.profile

#prepare the engines
client = Client(profile=profile)
#The following command will make sure that each engine is running in
# the right working directory to access the custom function(s).
client[:].map(os.chdir, [FILE_DIR] * len(client))
bview = client.load_balanced_view()

register_parallel_backend('ipyparallel',
                          lambda: IPythonParallelBackend(view=bview))

s = 'len(client)={}, joblib.cpu_count()={}, mpr.cpu_count()={}'.\
    format( len(client), joblib.cpu_count(), mpr.cpu_count())
print(s)

logging.basicConfig(filename=os.path.join(FILE_DIR, profile + '.log'),
                    filemode='w',
                    level=logging.DEBUG)
logging.info("number of CPUs found: {0}".format(cpu_count()))
logging.info("args.profile: {0}".format(profile))
logging.info("c.ids :{0}".format(str(client.ids)))
logging.info("{}".format(s))

from module_joblib_parallel_test import fun
Example #11
0
    param_grid = {
        'max_depth': [10],
        'min_samples_leaf': [1, 2],
        'min_samples_split': [2],
    }

    # TODO : fix values appropriately
    n_jobs = -1
    nb_folds = 3
    verbose = 100
    backend = "dask"
    fit_params = {}
    scorer = metrics.f1_score

    register_parallel_backend(backend, DaskDistributedBackend)

    gs_estimator = GridSearchCV(estimator=estimator, param_grid=param_grid)

    logging.info("Entering Dask Context")
    with parallel_backend("dask"):
        logging.info("Entered Dask Context")

        logging.info(
            "Running GridSearchCV.fit with %s as a parallel back-end" %
            backend)

        gs_estimator.fit(X, y)

        logging.info("Done running GridSearchCV.fit")
Example #12
0
"""Example showing how to use joblib-hadoop with an YARN cluster"""

from math import sqrt
from joblib import (Parallel, delayed,
                    register_parallel_backend, parallel_backend)
from joblibhadoop.yarn import YarnBackend

if __name__ == '__main__':
    register_parallel_backend('yarn', YarnBackend)

    # Run in parallel using Yarn backend
    with parallel_backend('yarn', n_jobs=5):
        print(Parallel(verbose=100)(
            delayed(sqrt)(i**2) for i in range(100)))

    # Should be executed in parallel locally
    print(Parallel(verbose=100, n_jobs=5)(
        delayed(sqrt)(i**2) for i in range(100)))
Example #13
0
        super().__init__(*args, **kwargs)

    def start_call(self):
        self.tqdm = tqdm(total=self._job_count, unit='tasks')
        self._orig_print_progress = self.parallel.print_progress
        self.parallel.print_progress = self.update_progress

    def update_progress(self):
        try:
            self.tqdm.update(1)
        except:
            self._origin_print_progress()

    def stop_call(self):
        try:
            self.tqdm.close()
        except:
            self._origin_print_progress()

    def terminate(self):
        try:
            self.tqdm.close()
        except:
            pass
        finally:
            super().terminate()


#: register joblib parallel omegaml  backend
joblib.register_parallel_backend('omegaml', OmegaRuntimeBackend)
Example #14
0
    def apply_async(self, func, *args, **kwargs):
        callback = kwargs.pop('callback', None)
        kwargs['pure'] = False
        future = self.executor.submit(func, *args, **kwargs)
        self.futures.add(future)

        @gen.coroutine
        def callback_wrapper():
            result = yield _wait([future])
            self.futures.remove(future)
            callback(result)  # gets called in separate thread

        self.executor.loop.add_callback(callback_wrapper)

        future.get = future.result  # monkey patch to achieve AsyncResult API
        return future

    def abort_everything(self, ensure_ready=True):
        # Tell the executor to cancel any task submitted via this instance
        # as joblib.Parallel will never access those results.
        self.executor.cancel(self.futures)
        self.futures.clear()


# Register the backend with any available versions of joblib
if joblib:
    joblib.register_parallel_backend('distributed', DistributedBackend)
if sk_joblib:
    sk_joblib.register_parallel_backend('distributed', DistributedBackend)