Ejemplo n.º 1
0
def worker_func(func_file_id):
    # Have the output File expire in 7 days.
    expires_at = (datetime.now() + timedelta(days=7)).isoformat()

    client = civis.APIClient()
    job_id = os.environ.get('CIVIS_JOB_ID')
    run_id = os.environ.get('CIVIS_RUN_ID')
    if not job_id or not run_id:
        raise RuntimeError("This function must be run inside a "
                           "Civis container job.")

    # Run the function.
    result = None
    try:
        func, remote_backend = _robust_pickle_download(
            func_file_id, client=client, n_retries=5, delay=0.5)

        _backend = _setup_remote_backend(remote_backend)

        # graceful nested context managers are ~hard across python versions,
        # this just works...
        if NO_SKLEARN:
            with _joblib_para_backend(_backend):
                result = func()
        else:
            # we are using the nested context managers to set the joblib
            # backend to the requested one in both copes of joblib, the
            # package and the copy shipped by sklearn at
            # `sklearn.externals.joblib`. joblib maintains the current
            # backend as global state in the package and thus there are
            # two backends to set when you have two copies of the package
            # in play.
            with _sklearn_para_backend(_backend):
                with _joblib_para_backend(_backend):
                    result = func()
    except Exception:
        print("Error! Attempting to record exception.")
        # Wrap the exception in joblib's TransportableException
        # so that joblib can properly display the results.
        e_type, e_value, e_tb = sys.exc_info()
        text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1)
        result = TransportableException(text, e_type)
        raise
    finally:
        # Serialize the result and upload it to the Files API.
        if result is not None:
            # If the function exits without erroring, we may not have a result.
            result_buffer = BytesIO()
            cloudpickle.dump(result, result_buffer, pickle.HIGHEST_PROTOCOL)
            result_buffer.seek(0)
            output_name = "Results from Joblib job {} / run {}".format(job_id,
                                                                       run_id)
            output_file_id = _robust_file_to_civis(result_buffer, output_name,
                                                   n_retries=5, delay=0.5,
                                                   expires_at=expires_at,
                                                   client=client)
            client.scripts.post_containers_runs_outputs(job_id, run_id,
                                                        'File', output_file_id)
            print("Results output to file ID: {}".format(output_file_id))
Ejemplo n.º 2
0
def test_format_exc_with_compiled_code():
    # Trying to tokenize compiled C code raise SyntaxError.
    # See https://github.com/joblib/joblib/issues/101 for more details.
    try:
        np.random.uniform('invalid_value')
    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        formatted_exc = format_exc(exc_type, exc_value,
                                   exc_traceback, context=10)
        # The name of the extension can be something like
        # mtrand.cpython-33m.so
        pattern = 'mtrand[a-z0-9.-]*\.(so|pyd)'
        assert_true(re.search(pattern, formatted_exc))
Ejemplo n.º 3
0
def test_format_exc_with_compiled_code():
    # Trying to tokenize compiled C code raise SyntaxError.
    # See https://github.com/joblib/joblib/issues/101 for more details.
    try:
        np.random.uniform('invalid_value')
    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        formatted_exc = format_exc(exc_type, exc_value,
                                   exc_traceback, context=10)
        # The name of the extension can be something like
        # mtrand.cpython-33m.so
        pattern = r'mtrand[a-z0-9._-]*\.(so|pyd)'
        assert re.search(pattern, formatted_exc)
Ejemplo n.º 4
0
def worker_func(func_file_id):
    # Have the output File expire in 7 days.
    expires_at = (datetime.now() + timedelta(days=7)).isoformat()

    client = civis.APIClient()
    job_id = os.environ.get('CIVIS_JOB_ID')
    run_id = os.environ.get('CIVIS_RUN_ID')
    if not job_id or not run_id:
        raise RuntimeError("This function must be run inside a "
                           "Civis container job.")

    func_buffer = BytesIO()
    civis.io.civis_to_file(func_file_id, func_buffer)
    func_buffer.seek(0)
    func = joblib.load(func_buffer)

    # Run the function.
    result = None
    try:
        result = func()
    except Exception:
        print("Error! Attempting to record exception.")
        # Wrap the exception in joblib's TransportableException
        # so that joblib can properly display the results.
        e_type, e_value, e_tb = sys.exc_info()
        text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1)
        result = TransportableException(text, e_type)
        raise
    finally:
        # Serialize the result and upload it to the Files API.
        # Note that if compress is 0, joblib will output multiple files.
        # compress=3 is a good compromise between space and read/write times
        # (https://github.com/joblib/joblib/blob/18f9b4ce95e8788cc0e9b5106fc22573d768c44b/joblib/numpy_pickle.py#L358).
        if result is not None:
            # If the function exits without erroring, we may not have a result.
            result_buffer = BytesIO()
            joblib.dump(result, result_buffer, compress=3)
            result_buffer.seek(0)
            output_name = "Results from Joblib job {} / run {}".format(
                job_id, run_id)
            output_file_id = civis.io.file_to_civis(result_buffer,
                                                    output_name,
                                                    expires_at=expires_at)
            client.scripts.post_containers_runs_outputs(
                job_id, run_id, 'File', output_file_id)
            print("Results output to file ID: {}".format(
                output_name, output_file_id))