def worker_func(func_file_id): # Have the output File expire in 7 days. expires_at = (datetime.now() + timedelta(days=7)).isoformat() client = civis.APIClient() job_id = os.environ.get('CIVIS_JOB_ID') run_id = os.environ.get('CIVIS_RUN_ID') if not job_id or not run_id: raise RuntimeError("This function must be run inside a " "Civis container job.") # Run the function. result = None try: func, remote_backend = _robust_pickle_download( func_file_id, client=client, n_retries=5, delay=0.5) _backend = _setup_remote_backend(remote_backend) # graceful nested context managers are ~hard across python versions, # this just works... if NO_SKLEARN: with _joblib_para_backend(_backend): result = func() else: # we are using the nested context managers to set the joblib # backend to the requested one in both copes of joblib, the # package and the copy shipped by sklearn at # `sklearn.externals.joblib`. joblib maintains the current # backend as global state in the package and thus there are # two backends to set when you have two copies of the package # in play. with _sklearn_para_backend(_backend): with _joblib_para_backend(_backend): result = func() except Exception: print("Error! Attempting to record exception.") # Wrap the exception in joblib's TransportableException # so that joblib can properly display the results. e_type, e_value, e_tb = sys.exc_info() text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1) result = TransportableException(text, e_type) raise finally: # Serialize the result and upload it to the Files API. if result is not None: # If the function exits without erroring, we may not have a result. result_buffer = BytesIO() cloudpickle.dump(result, result_buffer, pickle.HIGHEST_PROTOCOL) result_buffer.seek(0) output_name = "Results from Joblib job {} / run {}".format(job_id, run_id) output_file_id = _robust_file_to_civis(result_buffer, output_name, n_retries=5, delay=0.5, expires_at=expires_at, client=client) client.scripts.post_containers_runs_outputs(job_id, run_id, 'File', output_file_id) print("Results output to file ID: {}".format(output_file_id))
def test_format_exc_with_compiled_code(): # Trying to tokenize compiled C code raise SyntaxError. # See https://github.com/joblib/joblib/issues/101 for more details. try: np.random.uniform('invalid_value') except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() formatted_exc = format_exc(exc_type, exc_value, exc_traceback, context=10) # The name of the extension can be something like # mtrand.cpython-33m.so pattern = 'mtrand[a-z0-9.-]*\.(so|pyd)' assert_true(re.search(pattern, formatted_exc))
def test_format_exc_with_compiled_code(): # Trying to tokenize compiled C code raise SyntaxError. # See https://github.com/joblib/joblib/issues/101 for more details. try: np.random.uniform('invalid_value') except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() formatted_exc = format_exc(exc_type, exc_value, exc_traceback, context=10) # The name of the extension can be something like # mtrand.cpython-33m.so pattern = r'mtrand[a-z0-9._-]*\.(so|pyd)' assert re.search(pattern, formatted_exc)
def worker_func(func_file_id): # Have the output File expire in 7 days. expires_at = (datetime.now() + timedelta(days=7)).isoformat() client = civis.APIClient() job_id = os.environ.get('CIVIS_JOB_ID') run_id = os.environ.get('CIVIS_RUN_ID') if not job_id or not run_id: raise RuntimeError("This function must be run inside a " "Civis container job.") func_buffer = BytesIO() civis.io.civis_to_file(func_file_id, func_buffer) func_buffer.seek(0) func = joblib.load(func_buffer) # Run the function. result = None try: result = func() except Exception: print("Error! Attempting to record exception.") # Wrap the exception in joblib's TransportableException # so that joblib can properly display the results. e_type, e_value, e_tb = sys.exc_info() text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1) result = TransportableException(text, e_type) raise finally: # Serialize the result and upload it to the Files API. # Note that if compress is 0, joblib will output multiple files. # compress=3 is a good compromise between space and read/write times # (https://github.com/joblib/joblib/blob/18f9b4ce95e8788cc0e9b5106fc22573d768c44b/joblib/numpy_pickle.py#L358). if result is not None: # If the function exits without erroring, we may not have a result. result_buffer = BytesIO() joblib.dump(result, result_buffer, compress=3) result_buffer.seek(0) output_name = "Results from Joblib job {} / run {}".format( job_id, run_id) output_file_id = civis.io.file_to_civis(result_buffer, output_name, expires_at=expires_at) client.scripts.post_containers_runs_outputs( job_id, run_id, 'File', output_file_id) print("Results output to file ID: {}".format( output_name, output_file_id))