Esempio n. 1
0
    def _init_civis_backend(self):
        """init the Civis API client and the executors"""
        self.using_template = (self.from_template_id is not None)

        if self.max_submit_retries < 0:
            raise ValueError(
                "max_submit_retries cannot be negative (value = %d)" %
                self.max_submit_retries)

        self.client = self.client or civis.APIClient()
        if self.from_template_id:
            self.executor = CustomScriptExecutor(self.from_template_id,
                                                 client=self.client,
                                                 **self.executor_kwargs)
        else:
            self.executor = _ContainerShellExecutor(client=self.client,
                                                    **self.executor_kwargs)
Esempio n. 2
0
    def __init__(self,
                 setup_cmd=_DEFAULT_SETUP_CMD,
                 from_template_id=None,
                 max_submit_retries=0,
                 client=None,
                 **executor_kwargs):
        if max_submit_retries < 0:
            raise ValueError(
                "max_submit_retries cannot be negative (value = %d)" %
                max_submit_retries)

        if client is None:
            client = civis.APIClient(resources='all')
        self._client = client
        if from_template_id:
            self.executor = CustomScriptExecutor(from_template_id,
                                                 client=client,
                                                 **executor_kwargs)
        else:
            self.executor = _ContainerShellExecutor(client=client,
                                                    **executor_kwargs)
        self.setup_cmd = setup_cmd
        self.max_submit_retries = max_submit_retries
        self.using_template = (from_template_id is not None)
Esempio n. 3
0
def _check_executor(from_template_id=None):
    job_id, run_id = 42, 43
    c = _setup_client_mock(job_id, run_id, n_failures=0)
    mock_run = c.scripts.post_containers_runs()
    if from_template_id:
        bpe = CustomScriptExecutor(from_template_id=from_template_id,
                                   client=c,
                                   polling_interval=0.01)
        future = bpe.submit(my_param='spam')
    else:
        bpe = _ContainerShellExecutor(client=c, polling_interval=0.01)
        future = bpe.submit("foo")

    # Mock and test running, future.job_id, and done()
    mock_run.state = "running"
    assert future.running(), "future is incorrectly marked as not running"
    assert future.job_id == job_id, "job_id not stored properly"
    assert not future.done(), "future is incorrectly marked as done"

    future.cancel()

    # Mock and test cancelled()
    assert future.cancelled(), "cancelled() did not return True as expected"
    assert not future.running(), "running() did not return False as expected"

    # Mock and test done()
    mock_run.state = "succeeded"
    assert future.done(), "done() did not return True as expected"

    # Test cancelling all jobs.
    mock_run.state = "running"
    bpe.cancel_all()
    assert future.cancelled(), "cancel_all() failed"

    # Test shutdown method.
    bpe.shutdown(wait=True)
    assert future.done(), "shutdown() failed"

    return c
Esempio n. 4
0
class _CivisBackend(ParallelBackendBase):
    """The backend class that tells joblib how to use Civis to run jobs

    Users should interact with this through ``make_backend_factory``.
    """
    uses_threads = False
    supports_sharedmem = False
    supports_timeout = True

    def __init__(self,
                 setup_cmd=_DEFAULT_SETUP_CMD,
                 from_template_id=None,
                 max_submit_retries=0,
                 client=None,
                 remote_backend='sequential',
                 nesting_level=0,
                 **executor_kwargs):
        self.setup_cmd = setup_cmd
        self.from_template_id = from_template_id
        self.max_submit_retries = max_submit_retries
        self.client = client
        self.remote_backend = remote_backend
        self.executor_kwargs = executor_kwargs
        self.nesting_level = nesting_level
        self._init_civis_backend()

    @classmethod
    def from_existing(cls, klass):
        """Build a new `_CivisBackend` from an existing one."""
        return cls(setup_cmd=klass.setup_cmd,
                   from_template_id=klass.from_template_id,
                   max_submit_retries=klass.max_submit_retries,
                   client=klass.client,
                   remote_backend=klass.remote_backend,
                   **klass.executor_kwargs)

    def _init_civis_backend(self):
        """init the Civis API client and the executors"""
        self.using_template = (self.from_template_id is not None)

        if self.max_submit_retries < 0:
            raise ValueError(
                "max_submit_retries cannot be negative (value = %d)" %
                self.max_submit_retries)

        self.client = self.client or civis.APIClient()
        if self.from_template_id:
            self.executor = CustomScriptExecutor(self.from_template_id,
                                                 client=self.client,
                                                 **self.executor_kwargs)
        else:
            self.executor = _ContainerShellExecutor(client=self.client,
                                                    **self.executor_kwargs)

    def effective_n_jobs(self, n_jobs):
        if n_jobs == -1:
            n_jobs = _ALL_JOBS
        if n_jobs <= 0:
            raise ValueError("Please request a positive number of jobs, "
                             "or use \"-1\" to request a default "
                             "of {} jobs.".format(_ALL_JOBS))
        return n_jobs

    def abort_everything(self, ensure_ready=True):
        # This method is called when a job has raised an exception.
        # In that case, we're not going to finish computations, so
        # we should free up Platform resources in any remaining jobs.
        self.executor.cancel_all()
        if not ensure_ready:
            self.executor.shutdown(wait=False)

    def terminate(self):
        """Shutdown the workers and free the shared memory."""
        return self.abort_everything(ensure_ready=True)

    def apply_async(self, func, callback=None):
        """Schedule func to be run
        """
        # Serialize func to a temporary file and upload it to a Civis File.
        # Make the temporary files expire in a week.
        expires_at = (datetime.now() + timedelta(days=7)).isoformat()
        with TemporaryDirectory() as tempdir:
            temppath = os.path.join(tempdir, "civis_joblib_backend_func")
            with open(temppath, "wb") as tmpfile:
                cloudpickle.dump((func, self if self.remote_backend == 'civis'
                                  else self.remote_backend), tmpfile,
                                 pickle.HIGHEST_PROTOCOL)
            with open(temppath, "rb") as tmpfile:
                func_file_id = \
                    _robust_file_to_civis(tmpfile,
                                          "civis_joblib_backend_func",
                                          n_retries=5,
                                          delay=0.5,
                                          expires_at=expires_at,
                                          client=self.client)
                log.debug("uploaded serialized function to File: %d",
                          func_file_id)

            # Use the Civis CLI client to download the job runner script into
            # the container, and then run it on the uploaded job.
            # Only download the runner script if it doesn't already
            # exist in the destination environment.
            runner_remote_path = "civis_joblib_worker"
            cmd = ("{setup_cmd} && "
                   "if command -v {runner_remote_path} >/dev/null; "
                   "then exec {runner_remote_path} {func_file_id}; "
                   "else pip install civis=={civis_version} && "
                   "exec {runner_remote_path} {func_file_id}; fi ".format(
                       civis_version=civis.__version__,
                       runner_remote_path=runner_remote_path,
                       func_file_id=func_file_id,
                       setup_cmd=self.setup_cmd))

            # Try to submit the command, with optional retrying for certain
            # error types.
            for n_retries in range(1 + self.max_submit_retries):
                try:
                    if self.using_template:
                        args = {'JOBLIB_FUNC_FILE_ID': func_file_id}
                        future = self.executor.submit(**args)
                        log.debug(
                            "Started custom script from template "
                            "%s with arguments %s",
                            self.executor.from_template_id, args)
                    else:
                        future = self.executor.submit(fn=cmd)
                        log.debug(
                            "started container script with "
                            "command: %s", cmd)
                    # Stop retrying if submission was successful.
                    break
                except CivisAPIError as e:
                    # If we've retried the maximum number of times already,
                    # then raise an exception.
                    retries_left = self.max_submit_retries - n_retries - 1
                    if retries_left < 1:
                        raise JobSubmissionError(e)

                    log.debug("Retrying submission. %d retries left",
                              retries_left)

                    # Sleep with exponentially increasing intervals in case
                    # the issue persists for a while.
                    time.sleep(2**n_retries)

            if self.executor.max_n_retries:
                # Start the ContainerFuture polling.
                # This will use more API calls, but will
                # allow the ContainerFuture to launch
                # retries if necessary.
                # (This is only relevant if we're not using the
                # notifications endpoint.)
                future.done()

            result = _CivisBackendResult(future, callback)

        return result

    def __getstate__(self):
        """override pickle to remove threading and civis APIClient objects"""
        state = self.__dict__.copy()
        if 'client' in state:
            state['client'] = None
        if 'executor' in state:
            del state['executor']
        # the parallel attribute gets added by the parent class when the
        # backend is in use.
        if 'parallel' in state:
            state['parallel'] = None
        return state

    def __setstate__(self, state):
        """re-init the backend when unpickling"""
        self.__dict__.update(state)
        self._init_civis_backend()
Esempio n. 5
0
class _CivisBackend(ParallelBackendBase):
    """The backend class that tells joblib how to use Civis to run jobs

    Users should interact with this through ``make_backend_factory``.
    """
    def __init__(self,
                 setup_cmd=_DEFAULT_SETUP_CMD,
                 from_template_id=None,
                 max_submit_retries=0,
                 client=None,
                 **executor_kwargs):
        if max_submit_retries < 0:
            raise ValueError(
                "max_submit_retries cannot be negative (value = %d)" %
                max_submit_retries)

        if client is None:
            client = civis.APIClient(resources='all')
        self._client = client
        if from_template_id:
            self.executor = CustomScriptExecutor(from_template_id,
                                                 client=client,
                                                 **executor_kwargs)
        else:
            self.executor = _ContainerShellExecutor(client=client,
                                                    **executor_kwargs)
        self.setup_cmd = setup_cmd
        self.max_submit_retries = max_submit_retries
        self.using_template = (from_template_id is not None)

    def effective_n_jobs(self, n_jobs):
        if n_jobs == -1:
            n_jobs = _ALL_JOBS
        if n_jobs <= 0:
            raise ValueError("Please request a positive number of jobs, "
                             "or use \"-1\" to request a default "
                             "of {} jobs.".format(_ALL_JOBS))
        return n_jobs

    def abort_everything(self, ensure_ready=True):
        # This method is called when a job has raised an exception.
        # In that case, we're not going to finish computations, so
        # we should free up Platform resources in any remaining jobs.
        self.executor.cancel_all()
        if not ensure_ready:
            self.executor.shutdown(wait=False)

    def apply_async(self, func, callback=None):
        """Schedule func to be run
        """
        # Serialize func to a temporary file and upload it to a Civis File.
        # Make the temporary files expire in a week.
        expires_at = (datetime.now() + timedelta(days=7)).isoformat()
        with TemporaryDirectory() as tempdir:
            temppath = os.path.join(tempdir, "civis_joblib_backend_func")
            # compress=3 is a compromise between space and read/write times
            # (https://github.com/joblib/joblib/blob/18f9b4ce95e8788cc0e9b5106fc22573d768c44b/joblib/numpy_pickle.py#L358).
            joblib.dump(func, temppath, compress=3)
            with open(temppath, "rb") as tmpfile:
                func_file_id = \
                    civis.io.file_to_civis(tmpfile,
                                           "civis_joblib_backend_func",
                                           expires_at=expires_at,
                                           client=self._client)
                log.debug("uploaded serialized function to File: %d",
                          func_file_id)

            # Use the Civis CLI client to download the job runner script into
            # the container, and then run it on the uploaded job.
            # Only download the runner script if it doesn't already
            # exist in the destination environment.
            runner_remote_path = "civis_joblib_worker"
            cmd = ("{setup_cmd} && "
                   "if command -v {runner_remote_path} >/dev/null; "
                   "then exec {runner_remote_path} {func_file_id}; "
                   "else pip install civis=={civis_version} && "
                   "pip install joblib=={jl_version} && "
                   "exec {runner_remote_path} {func_file_id}; fi".format(
                       jl_version=joblib.__version__,
                       civis_version=civis.__version__,
                       runner_remote_path=runner_remote_path,
                       func_file_id=func_file_id,
                       setup_cmd=self.setup_cmd))

            # Try to submit the command, with optional retrying for certain
            # error types.
            for n_retries in range(1 + self.max_submit_retries):
                try:
                    if self.using_template:
                        args = {'JOBLIB_FUNC_FILE_ID': func_file_id}
                        future = self.executor.submit(**args)
                        log.debug(
                            "Started custom script from template "
                            "%s with arguments %s",
                            self.executor.from_template_id, args)
                    else:
                        future = self.executor.submit(fn=cmd)
                        log.debug(
                            "started container script with "
                            "command: %s", cmd)
                    # Stop retrying if submission was successful.
                    break
                except CivisAPIError as e:
                    # If we've retried the maximum number of times already,
                    # then raise an exception.
                    retries_left = self.max_submit_retries - n_retries - 1
                    if retries_left < 1:
                        raise JobSubmissionError(e)

                    log.debug("Retrying submission. %d retries left",
                              retries_left)

                    # Sleep with exponentially increasing intervals in case
                    # the issue persists for a while.
                    time.sleep(2**n_retries)

            if self.executor.max_n_retries:
                # Start the ContainerFuture polling.
                # This will use more API calls, but will
                # allow the ContainerFuture to launch
                # retries if necessary.
                # (This is only relevant if we're not using the
                # notifications endpoint.)
                future.done()

            result = _CivisBackendResult(future, callback)

        return result