Exemple #1
0
def test_hashes_stay_the_same():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = random.Random(42)
    to_hash_list = ['This is a string to hash',
                    u"C'est l\xe9t\xe9",
                    (123456, 54321, -98765),
                    [rng.random() for _ in range(5)],
                    [3, 'abc', None,
                     TransportableException('the message', ValueError)],
                    {'abcde': 123, 'sadfas': [-9999, 2, 3]}]

    # These expected results have been generated with joblib 0.9.2
    expected_dict = {
        'py2': ['80436ada343b0d79a99bfd8883a96e45',
                '2ff3a25200eb6219f468de2640913c2d',
                '50d81c80af05061ac4dcdc2d5edee6d6',
                '536af09b66a087ed18b515acc17dc7fc',
                '123ffc6f13480767167e171a8e1f6f4a',
                'fc9314a39ff75b829498380850447047'],
        'py3': ['71b3f47df22cb19431d85d92d0b230b2',
                '2d8d189e9b2b0b2e384d93c868c0e576',
                'e205227dd82250871fa25aa0ec690aa3',
                '9e4e9bf9b91890c9734a6111a35e6633',
                '6065a3c48e842ea5dee2cfd0d6820ad6',
                'aeda150553d4bb5c69f0e69d51b0e2ef']}

    py_version_str = 'py3' if PY3 else 'py2'
    expected_list = expected_dict[py_version_str]

    for to_hash, expected in zip(to_hash_list, expected_list):
        yield assert_equal, hash(to_hash), expected
Exemple #2
0
def worker_func(func_file_id):
    # Have the output File expire in 7 days.
    expires_at = (datetime.now() + timedelta(days=7)).isoformat()

    client = civis.APIClient()
    job_id = os.environ.get('CIVIS_JOB_ID')
    run_id = os.environ.get('CIVIS_RUN_ID')
    if not job_id or not run_id:
        raise RuntimeError("This function must be run inside a "
                           "Civis container job.")

    # Run the function.
    result = None
    try:
        func, remote_backend = _robust_pickle_download(
            func_file_id, client=client, n_retries=5, delay=0.5)

        _backend = _setup_remote_backend(remote_backend)

        # graceful nested context managers are ~hard across python versions,
        # this just works...
        if NO_SKLEARN:
            with _joblib_para_backend(_backend):
                result = func()
        else:
            # we are using the nested context managers to set the joblib
            # backend to the requested one in both copes of joblib, the
            # package and the copy shipped by sklearn at
            # `sklearn.externals.joblib`. joblib maintains the current
            # backend as global state in the package and thus there are
            # two backends to set when you have two copies of the package
            # in play.
            with _sklearn_para_backend(_backend):
                with _joblib_para_backend(_backend):
                    result = func()
    except Exception:
        print("Error! Attempting to record exception.")
        # Wrap the exception in joblib's TransportableException
        # so that joblib can properly display the results.
        e_type, e_value, e_tb = sys.exc_info()
        text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1)
        result = TransportableException(text, e_type)
        raise
    finally:
        # Serialize the result and upload it to the Files API.
        if result is not None:
            # If the function exits without erroring, we may not have a result.
            result_buffer = BytesIO()
            cloudpickle.dump(result, result_buffer, pickle.HIGHEST_PROTOCOL)
            result_buffer.seek(0)
            output_name = "Results from Joblib job {} / run {}".format(job_id,
                                                                       run_id)
            output_file_id = _robust_file_to_civis(result_buffer, output_name,
                                                   n_retries=5, delay=0.5,
                                                   expires_at=expires_at,
                                                   client=client)
            client.scripts.post_containers_runs_outputs(job_id, run_id,
                                                        'File', output_file_id)
            print("Results output to file ID: {}".format(output_file_id))
Exemple #3
0
def worker_func(func_file_id):
    # Have the output File expire in 7 days.
    expires_at = (datetime.now() + timedelta(days=7)).isoformat()

    client = civis.APIClient()
    job_id = os.environ.get('CIVIS_JOB_ID')
    run_id = os.environ.get('CIVIS_RUN_ID')
    if not job_id or not run_id:
        raise RuntimeError("This function must be run inside a "
                           "Civis container job.")

    func_buffer = BytesIO()
    civis.io.civis_to_file(func_file_id, func_buffer)
    func_buffer.seek(0)
    func = joblib.load(func_buffer)

    # Run the function.
    result = None
    try:
        result = func()
    except Exception:
        print("Error! Attempting to record exception.")
        # Wrap the exception in joblib's TransportableException
        # so that joblib can properly display the results.
        e_type, e_value, e_tb = sys.exc_info()
        text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1)
        result = TransportableException(text, e_type)
        raise
    finally:
        # Serialize the result and upload it to the Files API.
        # Note that if compress is 0, joblib will output multiple files.
        # compress=3 is a good compromise between space and read/write times
        # (https://github.com/joblib/joblib/blob/18f9b4ce95e8788cc0e9b5106fc22573d768c44b/joblib/numpy_pickle.py#L358).
        if result is not None:
            # If the function exits without erroring, we may not have a result.
            result_buffer = BytesIO()
            joblib.dump(result, result_buffer, compress=3)
            result_buffer.seek(0)
            output_name = "Results from Joblib job {} / run {}".format(
                job_id, run_id)
            output_file_id = civis.io.file_to_civis(result_buffer,
                                                    output_name,
                                                    expires_at=expires_at)
            client.scripts.post_containers_runs_outputs(
                job_id, run_id, 'File', output_file_id)
            print("Results output to file ID: {}".format(
                output_name, output_file_id))
    def get(self, timeout=None):
        """Block and return the result of the job

        Parameters
        ----------
        timeout: float, optional
            If provided, wait this many seconds before issuing a TimeoutError

        Returns
        -------
        The output of the function which ``joblib`` ran via Civis
            NB: ``joblib`` expects that ``get`` will always return an iterable.
        The remote function(s) should always be wrapped in
        ``joblib.parallel.BatchedCalls``, which does always return a list.

        Raises
        ------
        TransportableException
            Any error in the remote job will result in a
            ``TransportableException``, to be handled by ``Parallel.retrieve``.
        futures.CancelledError
            If the remote job was cancelled before completion
        """
        if self.result is None:
            # Wait for the script to complete.
            wait([self._future], timeout=timeout)
            self.result = self._future.remote_func_output

        if self._future.exception() or not self._future.result_fetched:
            # If the job errored, we may have been able to return
            # an exception via the run outputs. If not, fall back
            # to the API exception.
            # Note that a successful job may still have an exception
            # result if job output retrieval failed.
            if self.result is not None:
                raise self.result
            else:
                # Use repr for the message because the API exception
                # typically has str(exc)==None.
                exc = self._future.exception()
                raise TransportableException(repr(exc), type(exc))

        return self.result
Exemple #5
0
                 'py3': '71b3f47df22cb19431d85d92d0b230b2'
             }),
              (u"C'est l\xe9t\xe9", {
                  'py2': '2ff3a25200eb6219f468de2640913c2d',
                  'py3': '2d8d189e9b2b0b2e384d93c868c0e576'
              }),
              ((123456, 54321, -98765), {
                  'py2': '50d81c80af05061ac4dcdc2d5edee6d6',
                  'py3': 'e205227dd82250871fa25aa0ec690aa3'
              }),
              ([random.Random(42).random() for _ in range(5)], {
                  'py2': '1a36a691b2e2ba3a9df72de3dccf17ea',
                  'py3': 'a11ffad81f9682a7d901e6edc3d16c84'
              }),
              ([3, 'abc', None,
                TransportableException('foo', ValueError)], {
                    'py2': 'adb6ba84990ee5e462dc138383f11802',
                    'py3': '994f663c64ba5e64b2a85ebe75287829'
                }),
              ({
                  'abcde': 123,
                  'sadfas': [-9999, 2, 3]
              }, {
                  'py2': 'fc9314a39ff75b829498380850447047',
                  'py3': 'aeda150553d4bb5c69f0e69d51b0e2ef'
              })])
def test_hashes_stay_the_same(to_hash, expected):
    py_version_str = 'py3' if PY3_OR_LATER else 'py2'
    if expected[py_version_str] == "994f663c64ba5e64b2a85ebe75287829":
        # [3, 'abc', None, TransportableException('foo', ValueError)]
        # started to fail when distributed is installed for some unknown
Exemple #6
0

@parametrize('to_hash,expected',
             [('This is a string to hash',
                 {'py2': '80436ada343b0d79a99bfd8883a96e45',
                  'py3': '71b3f47df22cb19431d85d92d0b230b2'}),
              (u"C'est l\xe9t\xe9",
                 {'py2': '2ff3a25200eb6219f468de2640913c2d',
                  'py3': '2d8d189e9b2b0b2e384d93c868c0e576'}),
              ((123456, 54321, -98765),
                 {'py2': '50d81c80af05061ac4dcdc2d5edee6d6',
                  'py3': 'e205227dd82250871fa25aa0ec690aa3'}),
              ([random.Random(42).random() for _ in range(5)],
                 {'py2': '1a36a691b2e2ba3a9df72de3dccf17ea',
                  'py3': 'a11ffad81f9682a7d901e6edc3d16c84'}),
              ([3, 'abc', None, TransportableException('foo', ValueError)],
                 {'py2': 'adb6ba84990ee5e462dc138383f11802',
                  'py3': '994f663c64ba5e64b2a85ebe75287829'}),
              ({'abcde': 123, 'sadfas': [-9999, 2, 3]},
                 {'py2': 'fc9314a39ff75b829498380850447047',
                  'py3': 'aeda150553d4bb5c69f0e69d51b0e2ef'})])
def test_hashes_stay_the_same(to_hash, expected):
    if "APPVEYOR" in os.environ:
        # Those tests pass on Windows on a local machine but the one with
        # [3, 'abc', None, TransportableException('foo', ValueError)]
        # started to fail on appveyor for an obscure reason.
        # As it cannot be reproduced locally, it is too challenging to
        # to debug.
        pytest.xfail("Appveyor specific failure that cannot be"
                     " reproducted locally")