def get(self, timeout=None): """ Return the result when it arrives. If timeout is not None and the result does not arrive within timeout seconds then multiprocessing.TimeoutError is raised. If the remote call raised an exception then that exception will be reraised by get(). """ try: res = self._q.get(timeout=timeout) except queue.Empty: raise multiprocessing.TimeoutError("Timed out") if isinstance(res, Exception): raise res return res
def step_wait(self, timeout=None): """ Parameters ---------- timeout : int or float, optional Number of seconds before the call to `step_wait` times out. If `None`, the call to `step_wait` never times out. Returns ------- observations : sample from `observation_space` A batch of observations from the vectorized environment. rewards : `np.ndarray` instance (dtype `np.float_`) A vector of rewards from the vectorized environment. dones : `np.ndarray` instance (dtype `np.bool_`) A vector whose entries indicate whether the episode has ended. infos : list of dict A list of auxiliary diagnostic information. """ self._assert_is_running() if self._state != AsyncState.WAITING_STEP: raise NoAsyncCallError( 'Calling `step_wait` without any prior call ' 'to `step_async`.', AsyncState.WAITING_STEP.value) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( 'The call to `step_wait` has timed out after ' '{0} second{1}.'.format(timeout, 's' if timeout > 1 else '')) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT observations_list, rewards, dones, infos = zip(*results) if not self.shared_memory: self.observations = concatenate(observations_list, self.observations, self.single_observation_space) return (deepcopy(self.observations) if self.copy else self.observations, np.array(rewards), np.array(dones, dtype=np.bool_), infos)
def __check_for_results(self, timeout: Optional[float] = None) -> None: input_batch, result = self.__results[0] # If this call is being made in a context where it is intended to be # nonblocking, checking if the result is ready (rather than trying to # retrieve the result itself) avoids costly synchronization. if timeout == 0 and not result.ready(): # ``multiprocessing.TimeoutError`` (rather than builtin # ``TimeoutError``) maintains consistency with ``AsyncResult.get``. raise multiprocessing.TimeoutError() i, output_batch = result.get(timeout=timeout) # TODO: This does not handle rejections from the next step! for message in output_batch: self.__next_step.poll() self.__next_step.submit(message) if i != len(input_batch): logger.warning( "Received incomplete batch (%0.2f%% complete), resubmitting...", i / len(input_batch) * 100, ) # TODO: This reserializes all the ``SerializedMessage`` data prior # to the processed index even though the values at those indices # will never be unpacked. It probably makes sense to remove that # data from the batch to avoid unnecessary serialization overhead. self.__results[0] = ( input_batch, self.__pool.apply_async( parallel_transform_worker_apply, ( self.__transform_function, input_batch, output_batch.block, i, ), ), ) return logger.debug("Completed %r, reclaiming blocks...", input_batch) self.__input_blocks.append(input_batch.block) self.__output_blocks.append(output_batch.block) self.__batches_in_progress.decrement() del self.__results[0]
def render_wait(self, timeout=None): self._assert_is_running() if self._state.value != AsyncState.WAITING_RENDER.value: raise NoAsyncCallError( 'Calling `render_wait` without any prior ' 'call to `render_async`.', AsyncState.WAITING_RESET.value) if not self._poll(timeout): self._state = self.default_state raise mp.TimeoutError( 'The call to `render_wait` has timed out after ' '{0} second{1}.'.format(timeout, 's' if timeout > 1 else '')) result, success = self.parent_pipes[0].recv() self._raise_if_errors([success]) self._state = self.default_state return result
def run(seconds, fun, *args, **kwargs): if seconds >= 0: pool = mp.get_context("spawn").Pool(processes=1) try: proc = pool.apply_async(fun, args, kwargs) result = proc.get(seconds) return result except mp.TimeoutError: pool.terminate() pool.close() raise mp.TimeoutError() finally: pool.terminate() pool.close() else: # if no timeout, then no point # in incurring cost of running as separate process # so call locally return fun(*args, **kwargs)
def recv_draw(processes, timeout=3600): if not processes: raise ValueError('No processes.') pipes = [proc._msg_pipe for proc in processes] ready = multiprocessing.connection.wait(pipes) if not ready: raise multiprocessing.TimeoutError('No message from samplers.') idxs = {id(proc._msg_pipe): proc for proc in processes} proc = idxs[id(ready[0])] msg = ready[0].recv() if msg[0] == 'error': old = msg[1] six.raise_from(RuntimeError('Chain %s failed.' % proc.chain), old) elif msg[0] == 'writing_done': proc._readable = True proc._num_samples += 1 return (proc, ) + msg[1:] else: raise ValueError('Sampler sent bad message.')
def terminate_all(processes, patience=2): for process in processes: try: process.abort() except Exception: pass start_time = time.time() try: for process in processes: timeout = time.time() + patience - start_time if timeout < 0: raise multiprocessing.TimeoutError() process.join(timeout) except multiprocessing.TimeoutError: logger.warn("Chain processes did not terminate as expected. " "Terminating forcefully...") for process in processes: process.terminate() for process in processes: process.join()
def waitfor(self, sess, varname, timeout=None): '''Wait on a boolen variable `varname` to be set to true for session `sess` as read from `sess.vars['varname']`. This call blocks until the attr is set to `True` most usually by a callback. WARNING ------- Do not call this from the event loop thread! ''' # retrieve cached event/blocker if possible event = mp.Event() if not self._blockers else self._blockers.pop() waiters = self._sess2waiters.setdefault(sess, {}) # sess -> {vars: ..} events = waiters.setdefault(varname, []) # var -> [events] events.append(event) def cleanup(event): """Dealloc events and waiter data structures. """ event.clear() # make it block for next cached use events.remove(event) # event lifetime expires with this call self._blockers.append(event) # cache for later use if not events: # event list is now empty so delete waiters.pop(varname) if not waiters: # no vars being waited so delete self._sess2waiters.pop(sess) # event was set faster then we could wait on it if sess.vars.get(varname): cleanup(event) return True res = event.wait(timeout=timeout) # block if timeout and not res: raise mp.TimeoutError( "'{}' was not set within '{}' seconds".format( varname, timeout)) cleanup(event) return res
def reset_wait(self, timeout=None): """ Parameters ---------- timeout : int or float, optional Number of seconds before the call to `reset_wait` times out. If `None`, the call to `reset_wait` never times out. Returns ------- observations : sample from `observation_space` A batch of observations from the vectorized environment. """ self._assert_is_running() if self._state != AsyncState.WAITING_RESET: raise NoAsyncCallError( "Calling `reset_wait` without any prior " "call to `reset_async`.", AsyncState.WAITING_RESET.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( "The call to `reset_wait` has timed out after " "{0} second{1}.".format(timeout, "s" if timeout > 1 else "")) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT if not self.shared_memory: self.observations = concatenate(results, self.observations, self.single_observation_space) return deepcopy(self.observations) if self.copy else self.observations
def recv_draw(processes, timeout=3600): if not processes: raise ValueError("No processes.") pipes = [proc._msg_pipe for proc in processes] ready = multiprocessing.connection.wait(pipes) if not ready: raise multiprocessing.TimeoutError("No message from samplers.") idxs = {id(proc._msg_pipe): proc for proc in processes} proc = idxs[id(ready[0])] msg = ready[0].recv() if msg[0] == "error": warns, old_error = msg[1:] if warns is not None: error = ParallelSamplingError(str(old_error), proc.chain, warns) else: error = RuntimeError("Chain %s failed." % proc.chain) raise error from old_error elif msg[0] == "writing_done": proc._readable = True proc._num_samples += 1 return (proc,) + msg[1:] else: raise ValueError("Sampler sent bad message.")
def raise_timeout(self, **kwargs): raise multiprocessing.TimeoutError()
with tqdm(strains) as pbar2: for strain in pbar2: try: if strain.acc not in server: db = server.new_database(strain.acc, description="") server.commit() def save_strain(): save_sequences(strain) p = multiprocessing.Process(target=save_strain) p.start() p.join(180) if p.is_alive(): p.terminate() p.join() raise multiprocessing.TimeoutError() strain.loaded = True strain.save() except Exception as ex: traceback.print_exc() server.rollback() if strain.acc in server: server.remove_database(strain.acc) server.commit()
def step_wait(self, timeout=None): """Wait for the calls to :obj:`step` in each sub-environment to finish. Parameters ---------- timeout : int or float, optional Number of seconds before the call to :meth:`step_wait` times out. If ``None``, the call to :meth:`step_wait` never times out. Returns ------- observations : element of :attr:`~VectorEnv.observation_space` A batch of observations from the vectorized environment. rewards : :obj:`np.ndarray`, dtype :obj:`np.float_` A vector of rewards from the vectorized environment. dones : :obj:`np.ndarray`, dtype :obj:`np.bool_` A vector whose entries indicate whether the episode has ended. infos : list of dict A list of auxiliary diagnostic information dicts from sub-environments. Raises ------ ClosedEnvironmentError If the environment was closed (if :meth:`close` was previously called). NoAsyncCallError If :meth:`step_wait` was called without any prior call to :meth:`step_async`. TimeoutError If :meth:`step_wait` timed out. """ self._assert_is_running() if self._state != AsyncState.WAITING_STEP: raise NoAsyncCallError( "Calling `step_wait` without any prior call " "to `step_async`.", AsyncState.WAITING_STEP.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `step_wait` has timed out after {timeout} second(s)." ) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT observations_list, rewards, dones, infos = zip(*results) if not self.shared_memory: self.observations = concatenate( self.single_observation_space, observations_list, self.observations, ) return ( deepcopy(self.observations) if self.copy else self.observations, np.array(rewards), np.array(dones, dtype=np.bool_), infos, )
def reset_wait( self, timeout=None, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None, ): """ Parameters ---------- timeout : int or float, optional Number of seconds before the call to `reset_wait` times out. If `None`, the call to `reset_wait` never times out. seed: ignored options: ignored Returns ------- element of :attr:`~VectorEnv.observation_space` A batch of observations from the vectorized environment. infos : list of dicts containing metadata Raises ------ ClosedEnvironmentError If the environment was closed (if :meth:`close` was previously called). NoAsyncCallError If :meth:`reset_wait` was called without any prior call to :meth:`reset_async`. TimeoutError If :meth:`reset_wait` timed out. """ self._assert_is_running() if self._state != AsyncState.WAITING_RESET: raise NoAsyncCallError( "Calling `reset_wait` without any prior " "call to `reset_async`.", AsyncState.WAITING_RESET.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `reset_wait` has timed out after {timeout} second(s)." ) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT if return_info: results, infos = zip(*results) infos = list(infos) if not self.shared_memory: self.observations = concatenate(self.single_observation_space, results, self.observations) return (deepcopy(self.observations) if self.copy else self.observations), infos else: if not self.shared_memory: self.observations = concatenate(self.single_observation_space, results, self.observations) return deepcopy( self.observations) if self.copy else self.observations
def test_subprocess_trace(datadog_tracer: ddtrace.Tracer, caplog: LogCaptureFixture): """Verify that spans created in subprocesses are written to the queue and then flushed to the server, when wrapped in the SubprocessTracer""" # Enable log output for this logger for duration of this test caplog.set_level(logging.DEBUG, DatadogLoggingTraceFilter._log.name) test = f"{inspect.stack()[0][3]}" # And also send its output through a multiprocessing queue to surface logs from the subprocess log_queue = mp.Queue() DatadogLoggingTraceFilter._log.addHandler(QueueHandler(log_queue)) DatadogLoggingTraceFilter.activate() subproc_test_msg = f"a test message was logged in a subprocess of {test}" state = mp.Queue() stop_sentinel = "-->STOP<--" with ddtrace.tracer.trace( name=f"{test}_operation", service=f"{test}_service", resource=f"{test}_resource", span_type=SpanTypes.TEST, ) as span: trace_id = span.trace_id logger = logging.getLogger(f"{test}_logger") test_msg = f"a test message was logged during {test}" logger.warning(test_msg) ctx = mp.get_context("fork") worker = ctx.Process( name=f"{test}_subproc", target=_do_things_in_subproc, args=( subproc_test_msg, state, ), ) worker.start() worker.join(timeout=10) if worker.is_alive(): worker.terminate() try: _drain_captured_log_queue(log_queue, stop_sentinel, caplog, force_immediate_stop=True) except Exception: print( "Error draining captured log queue when handling subproc TimeoutError" ) pass raise mp.TimeoutError( f"subprocess {worker.name} did not complete in timeout") DatadogLoggingTraceFilter._log.warning(stop_sentinel) subproc_trace_id, subproc_span_id = state.get(block=True, timeout=10) assert test_msg in caplog.text, "caplog.text did not seem to capture logging output during test" assert f"SPAN#{trace_id}" in caplog.text, "span marker not found in logging output" assert f"TRACE#{trace_id}" in caplog.text, "trace marker not found in logging output" assert f"resource {test}_resource" in caplog.text, "traced resource not found in logging output" assert subproc_trace_id == trace_id # subprocess tracing should be a continuation of the trace in parent process _drain_captured_log_queue(log_queue, stop_sentinel, caplog) assert f"{subproc_span_id}" in caplog.text, "subproc span id not found in logging output" assert ( f"resource {_do_things_in_subproc.__name__}_resource" in caplog.text), "subproc traced resource not found in logging output"
def join(self, *args, **kwargs): self.process.join(*args, **kwargs) if self.process.is_alive(): raise mp.TimeoutError()