def test_hashes_stay_the_same(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = random.Random(42) to_hash_list = ['This is a string to hash', u"C'est l\xe9t\xe9", (123456, 54321, -98765), [rng.random() for _ in range(5)], [3, 'abc', None, TransportableException('the message', ValueError)], {'abcde': 123, 'sadfas': [-9999, 2, 3]}] # These expected results have been generated with joblib 0.9.2 expected_dict = { 'py2': ['80436ada343b0d79a99bfd8883a96e45', '2ff3a25200eb6219f468de2640913c2d', '50d81c80af05061ac4dcdc2d5edee6d6', '536af09b66a087ed18b515acc17dc7fc', '123ffc6f13480767167e171a8e1f6f4a', 'fc9314a39ff75b829498380850447047'], 'py3': ['71b3f47df22cb19431d85d92d0b230b2', '2d8d189e9b2b0b2e384d93c868c0e576', 'e205227dd82250871fa25aa0ec690aa3', '9e4e9bf9b91890c9734a6111a35e6633', '6065a3c48e842ea5dee2cfd0d6820ad6', 'aeda150553d4bb5c69f0e69d51b0e2ef']} py_version_str = 'py3' if PY3 else 'py2' expected_list = expected_dict[py_version_str] for to_hash, expected in zip(to_hash_list, expected_list): yield assert_equal, hash(to_hash), expected
def worker_func(func_file_id): # Have the output File expire in 7 days. expires_at = (datetime.now() + timedelta(days=7)).isoformat() client = civis.APIClient() job_id = os.environ.get('CIVIS_JOB_ID') run_id = os.environ.get('CIVIS_RUN_ID') if not job_id or not run_id: raise RuntimeError("This function must be run inside a " "Civis container job.") # Run the function. result = None try: func, remote_backend = _robust_pickle_download( func_file_id, client=client, n_retries=5, delay=0.5) _backend = _setup_remote_backend(remote_backend) # graceful nested context managers are ~hard across python versions, # this just works... if NO_SKLEARN: with _joblib_para_backend(_backend): result = func() else: # we are using the nested context managers to set the joblib # backend to the requested one in both copes of joblib, the # package and the copy shipped by sklearn at # `sklearn.externals.joblib`. joblib maintains the current # backend as global state in the package and thus there are # two backends to set when you have two copies of the package # in play. with _sklearn_para_backend(_backend): with _joblib_para_backend(_backend): result = func() except Exception: print("Error! Attempting to record exception.") # Wrap the exception in joblib's TransportableException # so that joblib can properly display the results. e_type, e_value, e_tb = sys.exc_info() text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1) result = TransportableException(text, e_type) raise finally: # Serialize the result and upload it to the Files API. if result is not None: # If the function exits without erroring, we may not have a result. result_buffer = BytesIO() cloudpickle.dump(result, result_buffer, pickle.HIGHEST_PROTOCOL) result_buffer.seek(0) output_name = "Results from Joblib job {} / run {}".format(job_id, run_id) output_file_id = _robust_file_to_civis(result_buffer, output_name, n_retries=5, delay=0.5, expires_at=expires_at, client=client) client.scripts.post_containers_runs_outputs(job_id, run_id, 'File', output_file_id) print("Results output to file ID: {}".format(output_file_id))
def worker_func(func_file_id): # Have the output File expire in 7 days. expires_at = (datetime.now() + timedelta(days=7)).isoformat() client = civis.APIClient() job_id = os.environ.get('CIVIS_JOB_ID') run_id = os.environ.get('CIVIS_RUN_ID') if not job_id or not run_id: raise RuntimeError("This function must be run inside a " "Civis container job.") func_buffer = BytesIO() civis.io.civis_to_file(func_file_id, func_buffer) func_buffer.seek(0) func = joblib.load(func_buffer) # Run the function. result = None try: result = func() except Exception: print("Error! Attempting to record exception.") # Wrap the exception in joblib's TransportableException # so that joblib can properly display the results. e_type, e_value, e_tb = sys.exc_info() text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1) result = TransportableException(text, e_type) raise finally: # Serialize the result and upload it to the Files API. # Note that if compress is 0, joblib will output multiple files. # compress=3 is a good compromise between space and read/write times # (https://github.com/joblib/joblib/blob/18f9b4ce95e8788cc0e9b5106fc22573d768c44b/joblib/numpy_pickle.py#L358). if result is not None: # If the function exits without erroring, we may not have a result. result_buffer = BytesIO() joblib.dump(result, result_buffer, compress=3) result_buffer.seek(0) output_name = "Results from Joblib job {} / run {}".format( job_id, run_id) output_file_id = civis.io.file_to_civis(result_buffer, output_name, expires_at=expires_at) client.scripts.post_containers_runs_outputs( job_id, run_id, 'File', output_file_id) print("Results output to file ID: {}".format( output_name, output_file_id))
def get(self, timeout=None): """Block and return the result of the job Parameters ---------- timeout: float, optional If provided, wait this many seconds before issuing a TimeoutError Returns ------- The output of the function which ``joblib`` ran via Civis NB: ``joblib`` expects that ``get`` will always return an iterable. The remote function(s) should always be wrapped in ``joblib.parallel.BatchedCalls``, which does always return a list. Raises ------ TransportableException Any error in the remote job will result in a ``TransportableException``, to be handled by ``Parallel.retrieve``. futures.CancelledError If the remote job was cancelled before completion """ if self.result is None: # Wait for the script to complete. wait([self._future], timeout=timeout) self.result = self._future.remote_func_output if self._future.exception() or not self._future.result_fetched: # If the job errored, we may have been able to return # an exception via the run outputs. If not, fall back # to the API exception. # Note that a successful job may still have an exception # result if job output retrieval failed. if self.result is not None: raise self.result else: # Use repr for the message because the API exception # typically has str(exc)==None. exc = self._future.exception() raise TransportableException(repr(exc), type(exc)) return self.result
'py3': '71b3f47df22cb19431d85d92d0b230b2' }), (u"C'est l\xe9t\xe9", { 'py2': '2ff3a25200eb6219f468de2640913c2d', 'py3': '2d8d189e9b2b0b2e384d93c868c0e576' }), ((123456, 54321, -98765), { 'py2': '50d81c80af05061ac4dcdc2d5edee6d6', 'py3': 'e205227dd82250871fa25aa0ec690aa3' }), ([random.Random(42).random() for _ in range(5)], { 'py2': '1a36a691b2e2ba3a9df72de3dccf17ea', 'py3': 'a11ffad81f9682a7d901e6edc3d16c84' }), ([3, 'abc', None, TransportableException('foo', ValueError)], { 'py2': 'adb6ba84990ee5e462dc138383f11802', 'py3': '994f663c64ba5e64b2a85ebe75287829' }), ({ 'abcde': 123, 'sadfas': [-9999, 2, 3] }, { 'py2': 'fc9314a39ff75b829498380850447047', 'py3': 'aeda150553d4bb5c69f0e69d51b0e2ef' })]) def test_hashes_stay_the_same(to_hash, expected): py_version_str = 'py3' if PY3_OR_LATER else 'py2' if expected[py_version_str] == "994f663c64ba5e64b2a85ebe75287829": # [3, 'abc', None, TransportableException('foo', ValueError)] # started to fail when distributed is installed for some unknown
@parametrize('to_hash,expected', [('This is a string to hash', {'py2': '80436ada343b0d79a99bfd8883a96e45', 'py3': '71b3f47df22cb19431d85d92d0b230b2'}), (u"C'est l\xe9t\xe9", {'py2': '2ff3a25200eb6219f468de2640913c2d', 'py3': '2d8d189e9b2b0b2e384d93c868c0e576'}), ((123456, 54321, -98765), {'py2': '50d81c80af05061ac4dcdc2d5edee6d6', 'py3': 'e205227dd82250871fa25aa0ec690aa3'}), ([random.Random(42).random() for _ in range(5)], {'py2': '1a36a691b2e2ba3a9df72de3dccf17ea', 'py3': 'a11ffad81f9682a7d901e6edc3d16c84'}), ([3, 'abc', None, TransportableException('foo', ValueError)], {'py2': 'adb6ba84990ee5e462dc138383f11802', 'py3': '994f663c64ba5e64b2a85ebe75287829'}), ({'abcde': 123, 'sadfas': [-9999, 2, 3]}, {'py2': 'fc9314a39ff75b829498380850447047', 'py3': 'aeda150553d4bb5c69f0e69d51b0e2ef'})]) def test_hashes_stay_the_same(to_hash, expected): if "APPVEYOR" in os.environ: # Those tests pass on Windows on a local machine but the one with # [3, 'abc', None, TransportableException('foo', ValueError)] # started to fail on appveyor for an obscure reason. # As it cannot be reproduced locally, it is too challenging to # to debug. pytest.xfail("Appveyor specific failure that cannot be" " reproducted locally")