Beispiel #1
0
    def get(self, timeout=None):
        """
        Return the result when it arrives. If timeout is not None and the
        result does not arrive within timeout seconds then
        multiprocessing.TimeoutError is raised. If the remote call raised an
        exception then that exception will be reraised by get().
        """

        try:
            res = self._q.get(timeout=timeout)
        except queue.Empty:
            raise multiprocessing.TimeoutError("Timed out")

        if isinstance(res, Exception):
            raise res
        return res
Beispiel #2
0
    def step_wait(self, timeout=None):
        """
        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to `step_wait` times out. If
            `None`, the call to `step_wait` never times out.

        Returns
        -------
        observations : sample from `observation_space`
            A batch of observations from the vectorized environment.

        rewards : `np.ndarray` instance (dtype `np.float_`)
            A vector of rewards from the vectorized environment.

        dones : `np.ndarray` instance (dtype `np.bool_`)
            A vector whose entries indicate whether the episode has ended.

        infos : list of dict
            A list of auxiliary diagnostic information.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_STEP:
            raise NoAsyncCallError(
                'Calling `step_wait` without any prior call '
                'to `step_async`.', AsyncState.WAITING_STEP.value)

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                'The call to `step_wait` has timed out after '
                '{0} second{1}.'.format(timeout, 's' if timeout > 1 else ''))

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT
        observations_list, rewards, dones, infos = zip(*results)

        if not self.shared_memory:
            self.observations = concatenate(observations_list,
                                            self.observations,
                                            self.single_observation_space)

        return (deepcopy(self.observations)
                if self.copy else self.observations, np.array(rewards),
                np.array(dones, dtype=np.bool_), infos)
Beispiel #3
0
    def __check_for_results(self, timeout: Optional[float] = None) -> None:
        input_batch, result = self.__results[0]

        # If this call is being made in a context where it is intended to be
        # nonblocking, checking if the result is ready (rather than trying to
        # retrieve the result itself) avoids costly synchronization.
        if timeout == 0 and not result.ready():
            # ``multiprocessing.TimeoutError`` (rather than builtin
            # ``TimeoutError``) maintains consistency with ``AsyncResult.get``.
            raise multiprocessing.TimeoutError()

        i, output_batch = result.get(timeout=timeout)

        # TODO: This does not handle rejections from the next step!
        for message in output_batch:
            self.__next_step.poll()
            self.__next_step.submit(message)

        if i != len(input_batch):
            logger.warning(
                "Received incomplete batch (%0.2f%% complete), resubmitting...",
                i / len(input_batch) * 100,
            )
            # TODO: This reserializes all the ``SerializedMessage`` data prior
            # to the processed index even though the values at those indices
            # will never be unpacked. It probably makes sense to remove that
            # data from the batch to avoid unnecessary serialization overhead.
            self.__results[0] = (
                input_batch,
                self.__pool.apply_async(
                    parallel_transform_worker_apply,
                    (
                        self.__transform_function,
                        input_batch,
                        output_batch.block,
                        i,
                    ),
                ),
            )
            return

        logger.debug("Completed %r, reclaiming blocks...", input_batch)
        self.__input_blocks.append(input_batch.block)
        self.__output_blocks.append(output_batch.block)
        self.__batches_in_progress.decrement()

        del self.__results[0]
Beispiel #4
0
    def render_wait(self, timeout=None):
        self._assert_is_running()
        if self._state.value != AsyncState.WAITING_RENDER.value:
            raise NoAsyncCallError(
                'Calling `render_wait` without any prior '
                'call to `render_async`.', AsyncState.WAITING_RESET.value)

        if not self._poll(timeout):
            self._state = self.default_state
            raise mp.TimeoutError(
                'The call to `render_wait` has timed out after '
                '{0} second{1}.'.format(timeout, 's' if timeout > 1 else ''))

        result, success = self.parent_pipes[0].recv()
        self._raise_if_errors([success])
        self._state = self.default_state

        return result
Beispiel #5
0
def run(seconds, fun, *args, **kwargs):
    if seconds >= 0:
        pool = mp.get_context("spawn").Pool(processes=1)
        try:
            proc = pool.apply_async(fun, args, kwargs)
            result = proc.get(seconds)
            return result
        except mp.TimeoutError:
            pool.terminate()
            pool.close()
            raise mp.TimeoutError()
        finally:
            pool.terminate()
            pool.close()
    else:
        # if no timeout, then no point
        # in incurring cost of running as separate process
        # so call locally
        return fun(*args, **kwargs)
Beispiel #6
0
    def recv_draw(processes, timeout=3600):
        if not processes:
            raise ValueError('No processes.')
        pipes = [proc._msg_pipe for proc in processes]
        ready = multiprocessing.connection.wait(pipes)
        if not ready:
            raise multiprocessing.TimeoutError('No message from samplers.')
        idxs = {id(proc._msg_pipe): proc for proc in processes}
        proc = idxs[id(ready[0])]
        msg = ready[0].recv()

        if msg[0] == 'error':
            old = msg[1]
            six.raise_from(RuntimeError('Chain %s failed.' % proc.chain), old)
        elif msg[0] == 'writing_done':
            proc._readable = True
            proc._num_samples += 1
            return (proc, ) + msg[1:]
        else:
            raise ValueError('Sampler sent bad message.')
Beispiel #7
0
    def terminate_all(processes, patience=2):
        for process in processes:
            try:
                process.abort()
            except Exception:
                pass

        start_time = time.time()
        try:
            for process in processes:
                timeout = time.time() + patience - start_time
                if timeout < 0:
                    raise multiprocessing.TimeoutError()
                process.join(timeout)
        except multiprocessing.TimeoutError:
            logger.warn("Chain processes did not terminate as expected. "
                        "Terminating forcefully...")
            for process in processes:
                process.terminate()
            for process in processes:
                process.join()
Beispiel #8
0
    def waitfor(self, sess, varname, timeout=None):
        '''Wait on a boolen variable `varname` to be set to true for
        session `sess` as read from `sess.vars['varname']`.
        This call blocks until the attr is set to `True` most usually
        by a callback.

        WARNING
        -------
        Do not call this from the event loop thread!
        '''
        # retrieve cached event/blocker if possible
        event = mp.Event() if not self._blockers else self._blockers.pop()
        waiters = self._sess2waiters.setdefault(sess, {})  # sess -> {vars: ..}
        events = waiters.setdefault(varname, [])  # var -> [events]
        events.append(event)

        def cleanup(event):
            """Dealloc events and waiter data structures.
            """
            event.clear()  # make it block for next cached use
            events.remove(event)  # event lifetime expires with this call
            self._blockers.append(event)  # cache for later use
            if not events:  # event list is now empty so delete
                waiters.pop(varname)
            if not waiters:  # no vars being waited so delete
                self._sess2waiters.pop(sess)

        # event was set faster then we could wait on it
        if sess.vars.get(varname):
            cleanup(event)
            return True

        res = event.wait(timeout=timeout)  # block
        if timeout and not res:
            raise mp.TimeoutError(
                "'{}' was not set within '{}' seconds".format(
                    varname, timeout))
        cleanup(event)
        return res
Beispiel #9
0
    def reset_wait(self, timeout=None):
        """
        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to `reset_wait` times out. If
            `None`, the call to `reset_wait` never times out.

        Returns
        -------
        observations : sample from `observation_space`
            A batch of observations from the vectorized environment.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_RESET:
            raise NoAsyncCallError(
                "Calling `reset_wait` without any prior "
                "call to `reset_async`.",
                AsyncState.WAITING_RESET.value,
            )

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                "The call to `reset_wait` has timed out after "
                "{0} second{1}.".format(timeout, "s" if timeout > 1 else ""))

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT

        if not self.shared_memory:
            self.observations = concatenate(results, self.observations,
                                            self.single_observation_space)

        return deepcopy(self.observations) if self.copy else self.observations
Beispiel #10
0
    def recv_draw(processes, timeout=3600):
        if not processes:
            raise ValueError("No processes.")
        pipes = [proc._msg_pipe for proc in processes]
        ready = multiprocessing.connection.wait(pipes)
        if not ready:
            raise multiprocessing.TimeoutError("No message from samplers.")
        idxs = {id(proc._msg_pipe): proc for proc in processes}
        proc = idxs[id(ready[0])]
        msg = ready[0].recv()

        if msg[0] == "error":
            warns, old_error = msg[1:]
            if warns is not None:
                error = ParallelSamplingError(str(old_error), proc.chain, warns)
            else:
                error = RuntimeError("Chain %s failed." % proc.chain)
            raise error from old_error
        elif msg[0] == "writing_done":
            proc._readable = True
            proc._num_samples += 1
            return (proc,) + msg[1:]
        else:
            raise ValueError("Sampler sent bad message.")
Beispiel #11
0
 def raise_timeout(self, **kwargs):
     raise multiprocessing.TimeoutError()
Beispiel #12
0
            with tqdm(strains) as pbar2:
                for strain in pbar2:
                    try:
                        if strain.acc not in server:

                            db = server.new_database(strain.acc,
                                                     description="")
                            server.commit()

                            def save_strain():
                                save_sequences(strain)

                            p = multiprocessing.Process(target=save_strain)
                            p.start()
                            p.join(180)
                            if p.is_alive():
                                p.terminate()
                                p.join()
                                raise multiprocessing.TimeoutError()

                            strain.loaded = True
                            strain.save()

                    except Exception as ex:

                        traceback.print_exc()
                        server.rollback()
                        if strain.acc in server:
                            server.remove_database(strain.acc)
                            server.commit()
Beispiel #13
0
    def step_wait(self, timeout=None):
        """Wait for the calls to :obj:`step` in each sub-environment to finish.

        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to :meth:`step_wait` times out. If
            ``None``, the call to :meth:`step_wait` never times out.

        Returns
        -------
        observations : element of :attr:`~VectorEnv.observation_space`
            A batch of observations from the vectorized environment.

        rewards : :obj:`np.ndarray`, dtype :obj:`np.float_`
            A vector of rewards from the vectorized environment.

        dones : :obj:`np.ndarray`, dtype :obj:`np.bool_`
            A vector whose entries indicate whether the episode has ended.

        infos : list of dict
            A list of auxiliary diagnostic information dicts from sub-environments.

        Raises
        ------
        ClosedEnvironmentError
            If the environment was closed (if :meth:`close` was previously called).

        NoAsyncCallError
            If :meth:`step_wait` was called without any prior call to
            :meth:`step_async`.

        TimeoutError
            If :meth:`step_wait` timed out.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_STEP:
            raise NoAsyncCallError(
                "Calling `step_wait` without any prior call "
                "to `step_async`.",
                AsyncState.WAITING_STEP.value,
            )

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                f"The call to `step_wait` has timed out after {timeout} second(s)."
            )

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT
        observations_list, rewards, dones, infos = zip(*results)

        if not self.shared_memory:
            self.observations = concatenate(
                self.single_observation_space,
                observations_list,
                self.observations,
            )

        return (
            deepcopy(self.observations) if self.copy else self.observations,
            np.array(rewards),
            np.array(dones, dtype=np.bool_),
            infos,
        )
Beispiel #14
0
    def reset_wait(
        self,
        timeout=None,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        """
        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to `reset_wait` times out. If
            `None`, the call to `reset_wait` never times out.
        seed: ignored
        options: ignored

        Returns
        -------
        element of :attr:`~VectorEnv.observation_space`
            A batch of observations from the vectorized environment.
        infos : list of dicts containing metadata

        Raises
        ------
        ClosedEnvironmentError
            If the environment was closed (if :meth:`close` was previously called).

        NoAsyncCallError
            If :meth:`reset_wait` was called without any prior call to
            :meth:`reset_async`.

        TimeoutError
            If :meth:`reset_wait` timed out.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_RESET:
            raise NoAsyncCallError(
                "Calling `reset_wait` without any prior "
                "call to `reset_async`.",
                AsyncState.WAITING_RESET.value,
            )

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                f"The call to `reset_wait` has timed out after {timeout} second(s)."
            )

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT

        if return_info:
            results, infos = zip(*results)
            infos = list(infos)

            if not self.shared_memory:
                self.observations = concatenate(self.single_observation_space,
                                                results, self.observations)

            return (deepcopy(self.observations)
                    if self.copy else self.observations), infos
        else:
            if not self.shared_memory:
                self.observations = concatenate(self.single_observation_space,
                                                results, self.observations)

            return deepcopy(
                self.observations) if self.copy else self.observations
def test_subprocess_trace(datadog_tracer: ddtrace.Tracer,
                          caplog: LogCaptureFixture):
    """Verify that spans created in subprocesses are written to the queue and then flushed to the server,
    when wrapped in the SubprocessTracer"""

    # Enable log output for this logger for duration of this test
    caplog.set_level(logging.DEBUG, DatadogLoggingTraceFilter._log.name)
    test = f"{inspect.stack()[0][3]}"
    # And also send its output through a multiprocessing queue to surface logs from the subprocess
    log_queue = mp.Queue()
    DatadogLoggingTraceFilter._log.addHandler(QueueHandler(log_queue))
    DatadogLoggingTraceFilter.activate()

    subproc_test_msg = f"a test message was logged in a subprocess of {test}"
    state = mp.Queue()
    stop_sentinel = "-->STOP<--"

    with ddtrace.tracer.trace(
            name=f"{test}_operation",
            service=f"{test}_service",
            resource=f"{test}_resource",
            span_type=SpanTypes.TEST,
    ) as span:
        trace_id = span.trace_id
        logger = logging.getLogger(f"{test}_logger")
        test_msg = f"a test message was logged during {test}"
        logger.warning(test_msg)
        ctx = mp.get_context("fork")
        worker = ctx.Process(
            name=f"{test}_subproc",
            target=_do_things_in_subproc,
            args=(
                subproc_test_msg,
                state,
            ),
        )
        worker.start()
        worker.join(timeout=10)
        if worker.is_alive():
            worker.terminate()
            try:
                _drain_captured_log_queue(log_queue,
                                          stop_sentinel,
                                          caplog,
                                          force_immediate_stop=True)
            except Exception:
                print(
                    "Error draining captured log queue when handling subproc TimeoutError"
                )
                pass
            raise mp.TimeoutError(
                f"subprocess {worker.name} did not complete in timeout")
        DatadogLoggingTraceFilter._log.warning(stop_sentinel)

    subproc_trace_id, subproc_span_id = state.get(block=True, timeout=10)
    assert test_msg in caplog.text, "caplog.text did not seem to capture logging output during test"
    assert f"SPAN#{trace_id}" in caplog.text, "span marker not found in logging output"
    assert f"TRACE#{trace_id}" in caplog.text, "trace marker not found in logging output"
    assert f"resource {test}_resource" in caplog.text, "traced resource not found in logging output"
    assert subproc_trace_id == trace_id  # subprocess tracing should be a continuation of the trace in parent process

    _drain_captured_log_queue(log_queue, stop_sentinel, caplog)

    assert f"{subproc_span_id}" in caplog.text, "subproc span id not found in logging output"
    assert (
        f"resource {_do_things_in_subproc.__name__}_resource"
        in caplog.text), "subproc traced resource not found in logging output"
Beispiel #16
0
 def join(self, *args, **kwargs):
     self.process.join(*args, **kwargs)
     if self.process.is_alive():
         raise mp.TimeoutError()