def _init_civis_backend(self): """init the Civis API client and the executors""" self.using_template = (self.from_template_id is not None) if self.max_submit_retries < 0: raise ValueError( "max_submit_retries cannot be negative (value = %d)" % self.max_submit_retries) self.client = self.client or civis.APIClient() if self.from_template_id: self.executor = CustomScriptExecutor(self.from_template_id, client=self.client, **self.executor_kwargs) else: self.executor = _ContainerShellExecutor(client=self.client, **self.executor_kwargs)
def __init__(self, setup_cmd=_DEFAULT_SETUP_CMD, from_template_id=None, max_submit_retries=0, client=None, **executor_kwargs): if max_submit_retries < 0: raise ValueError( "max_submit_retries cannot be negative (value = %d)" % max_submit_retries) if client is None: client = civis.APIClient(resources='all') self._client = client if from_template_id: self.executor = CustomScriptExecutor(from_template_id, client=client, **executor_kwargs) else: self.executor = _ContainerShellExecutor(client=client, **executor_kwargs) self.setup_cmd = setup_cmd self.max_submit_retries = max_submit_retries self.using_template = (from_template_id is not None)
def _check_executor(from_template_id=None): job_id, run_id = 42, 43 c = _setup_client_mock(job_id, run_id, n_failures=0) mock_run = c.scripts.post_containers_runs() if from_template_id: bpe = CustomScriptExecutor(from_template_id=from_template_id, client=c, polling_interval=0.01) future = bpe.submit(my_param='spam') else: bpe = _ContainerShellExecutor(client=c, polling_interval=0.01) future = bpe.submit("foo") # Mock and test running, future.job_id, and done() mock_run.state = "running" assert future.running(), "future is incorrectly marked as not running" assert future.job_id == job_id, "job_id not stored properly" assert not future.done(), "future is incorrectly marked as done" future.cancel() # Mock and test cancelled() assert future.cancelled(), "cancelled() did not return True as expected" assert not future.running(), "running() did not return False as expected" # Mock and test done() mock_run.state = "succeeded" assert future.done(), "done() did not return True as expected" # Test cancelling all jobs. mock_run.state = "running" bpe.cancel_all() assert future.cancelled(), "cancel_all() failed" # Test shutdown method. bpe.shutdown(wait=True) assert future.done(), "shutdown() failed" return c
class _CivisBackend(ParallelBackendBase): """The backend class that tells joblib how to use Civis to run jobs Users should interact with this through ``make_backend_factory``. """ uses_threads = False supports_sharedmem = False supports_timeout = True def __init__(self, setup_cmd=_DEFAULT_SETUP_CMD, from_template_id=None, max_submit_retries=0, client=None, remote_backend='sequential', nesting_level=0, **executor_kwargs): self.setup_cmd = setup_cmd self.from_template_id = from_template_id self.max_submit_retries = max_submit_retries self.client = client self.remote_backend = remote_backend self.executor_kwargs = executor_kwargs self.nesting_level = nesting_level self._init_civis_backend() @classmethod def from_existing(cls, klass): """Build a new `_CivisBackend` from an existing one.""" return cls(setup_cmd=klass.setup_cmd, from_template_id=klass.from_template_id, max_submit_retries=klass.max_submit_retries, client=klass.client, remote_backend=klass.remote_backend, **klass.executor_kwargs) def _init_civis_backend(self): """init the Civis API client and the executors""" self.using_template = (self.from_template_id is not None) if self.max_submit_retries < 0: raise ValueError( "max_submit_retries cannot be negative (value = %d)" % self.max_submit_retries) self.client = self.client or civis.APIClient() if self.from_template_id: self.executor = CustomScriptExecutor(self.from_template_id, client=self.client, **self.executor_kwargs) else: self.executor = _ContainerShellExecutor(client=self.client, **self.executor_kwargs) def effective_n_jobs(self, n_jobs): if n_jobs == -1: n_jobs = _ALL_JOBS if n_jobs <= 0: raise ValueError("Please request a positive number of jobs, " "or use \"-1\" to request a default " "of {} jobs.".format(_ALL_JOBS)) return n_jobs def abort_everything(self, ensure_ready=True): # This method is called when a job has raised an exception. # In that case, we're not going to finish computations, so # we should free up Platform resources in any remaining jobs. self.executor.cancel_all() if not ensure_ready: self.executor.shutdown(wait=False) def terminate(self): """Shutdown the workers and free the shared memory.""" return self.abort_everything(ensure_ready=True) def apply_async(self, func, callback=None): """Schedule func to be run """ # Serialize func to a temporary file and upload it to a Civis File. # Make the temporary files expire in a week. expires_at = (datetime.now() + timedelta(days=7)).isoformat() with TemporaryDirectory() as tempdir: temppath = os.path.join(tempdir, "civis_joblib_backend_func") with open(temppath, "wb") as tmpfile: cloudpickle.dump((func, self if self.remote_backend == 'civis' else self.remote_backend), tmpfile, pickle.HIGHEST_PROTOCOL) with open(temppath, "rb") as tmpfile: func_file_id = \ _robust_file_to_civis(tmpfile, "civis_joblib_backend_func", n_retries=5, delay=0.5, expires_at=expires_at, client=self.client) log.debug("uploaded serialized function to File: %d", func_file_id) # Use the Civis CLI client to download the job runner script into # the container, and then run it on the uploaded job. # Only download the runner script if it doesn't already # exist in the destination environment. runner_remote_path = "civis_joblib_worker" cmd = ("{setup_cmd} && " "if command -v {runner_remote_path} >/dev/null; " "then exec {runner_remote_path} {func_file_id}; " "else pip install civis=={civis_version} && " "exec {runner_remote_path} {func_file_id}; fi ".format( civis_version=civis.__version__, runner_remote_path=runner_remote_path, func_file_id=func_file_id, setup_cmd=self.setup_cmd)) # Try to submit the command, with optional retrying for certain # error types. for n_retries in range(1 + self.max_submit_retries): try: if self.using_template: args = {'JOBLIB_FUNC_FILE_ID': func_file_id} future = self.executor.submit(**args) log.debug( "Started custom script from template " "%s with arguments %s", self.executor.from_template_id, args) else: future = self.executor.submit(fn=cmd) log.debug( "started container script with " "command: %s", cmd) # Stop retrying if submission was successful. break except CivisAPIError as e: # If we've retried the maximum number of times already, # then raise an exception. retries_left = self.max_submit_retries - n_retries - 1 if retries_left < 1: raise JobSubmissionError(e) log.debug("Retrying submission. %d retries left", retries_left) # Sleep with exponentially increasing intervals in case # the issue persists for a while. time.sleep(2**n_retries) if self.executor.max_n_retries: # Start the ContainerFuture polling. # This will use more API calls, but will # allow the ContainerFuture to launch # retries if necessary. # (This is only relevant if we're not using the # notifications endpoint.) future.done() result = _CivisBackendResult(future, callback) return result def __getstate__(self): """override pickle to remove threading and civis APIClient objects""" state = self.__dict__.copy() if 'client' in state: state['client'] = None if 'executor' in state: del state['executor'] # the parallel attribute gets added by the parent class when the # backend is in use. if 'parallel' in state: state['parallel'] = None return state def __setstate__(self, state): """re-init the backend when unpickling""" self.__dict__.update(state) self._init_civis_backend()
class _CivisBackend(ParallelBackendBase): """The backend class that tells joblib how to use Civis to run jobs Users should interact with this through ``make_backend_factory``. """ def __init__(self, setup_cmd=_DEFAULT_SETUP_CMD, from_template_id=None, max_submit_retries=0, client=None, **executor_kwargs): if max_submit_retries < 0: raise ValueError( "max_submit_retries cannot be negative (value = %d)" % max_submit_retries) if client is None: client = civis.APIClient(resources='all') self._client = client if from_template_id: self.executor = CustomScriptExecutor(from_template_id, client=client, **executor_kwargs) else: self.executor = _ContainerShellExecutor(client=client, **executor_kwargs) self.setup_cmd = setup_cmd self.max_submit_retries = max_submit_retries self.using_template = (from_template_id is not None) def effective_n_jobs(self, n_jobs): if n_jobs == -1: n_jobs = _ALL_JOBS if n_jobs <= 0: raise ValueError("Please request a positive number of jobs, " "or use \"-1\" to request a default " "of {} jobs.".format(_ALL_JOBS)) return n_jobs def abort_everything(self, ensure_ready=True): # This method is called when a job has raised an exception. # In that case, we're not going to finish computations, so # we should free up Platform resources in any remaining jobs. self.executor.cancel_all() if not ensure_ready: self.executor.shutdown(wait=False) def apply_async(self, func, callback=None): """Schedule func to be run """ # Serialize func to a temporary file and upload it to a Civis File. # Make the temporary files expire in a week. expires_at = (datetime.now() + timedelta(days=7)).isoformat() with TemporaryDirectory() as tempdir: temppath = os.path.join(tempdir, "civis_joblib_backend_func") # compress=3 is a compromise between space and read/write times # (https://github.com/joblib/joblib/blob/18f9b4ce95e8788cc0e9b5106fc22573d768c44b/joblib/numpy_pickle.py#L358). joblib.dump(func, temppath, compress=3) with open(temppath, "rb") as tmpfile: func_file_id = \ civis.io.file_to_civis(tmpfile, "civis_joblib_backend_func", expires_at=expires_at, client=self._client) log.debug("uploaded serialized function to File: %d", func_file_id) # Use the Civis CLI client to download the job runner script into # the container, and then run it on the uploaded job. # Only download the runner script if it doesn't already # exist in the destination environment. runner_remote_path = "civis_joblib_worker" cmd = ("{setup_cmd} && " "if command -v {runner_remote_path} >/dev/null; " "then exec {runner_remote_path} {func_file_id}; " "else pip install civis=={civis_version} && " "pip install joblib=={jl_version} && " "exec {runner_remote_path} {func_file_id}; fi".format( jl_version=joblib.__version__, civis_version=civis.__version__, runner_remote_path=runner_remote_path, func_file_id=func_file_id, setup_cmd=self.setup_cmd)) # Try to submit the command, with optional retrying for certain # error types. for n_retries in range(1 + self.max_submit_retries): try: if self.using_template: args = {'JOBLIB_FUNC_FILE_ID': func_file_id} future = self.executor.submit(**args) log.debug( "Started custom script from template " "%s with arguments %s", self.executor.from_template_id, args) else: future = self.executor.submit(fn=cmd) log.debug( "started container script with " "command: %s", cmd) # Stop retrying if submission was successful. break except CivisAPIError as e: # If we've retried the maximum number of times already, # then raise an exception. retries_left = self.max_submit_retries - n_retries - 1 if retries_left < 1: raise JobSubmissionError(e) log.debug("Retrying submission. %d retries left", retries_left) # Sleep with exponentially increasing intervals in case # the issue persists for a while. time.sleep(2**n_retries) if self.executor.max_n_retries: # Start the ContainerFuture polling. # This will use more API calls, but will # allow the ContainerFuture to launch # retries if necessary. # (This is only relevant if we're not using the # notifications endpoint.) future.done() result = _CivisBackendResult(future, callback) return result