def _run(self):
        while True:
            try:
                self.pool.wait_available()
                reqGroup = self._requestQueue.getLatestGroup()
                retryGroup = self._retryQueue.getLatestGroup()

                if reqGroup is None and retryGroup is None:
                    if self._killed:
                        break
                    else:
                        self._requestAdded.clear()
                        self._requestAdded.wait(self._retryQueue.getMinWaitTime())
                        continue
                
                if retryGroup is None or (reqGroup is not None and reqGroup > retryGroup):
                    request, responseIterator, group, requestIndex = self._requestQueue.pop()
                    numTries = 0

                    if isinstance(request, tuple):
                        bundle = Bundle(request[0])
                        bundle.obj = request[1]
                        bundle.hasobj = True
                    else:
                        bundle = Bundle(request)

                    if self._skip(bundle):
                        responseIterator._add(bundle, requestIndex)
                        continue

                    try:
                        if isinstance(bundle.request, basestring):
                            bundle.request = Request(method = 'GET', url = bundle.request)
                        if isinstance(bundle.request, Request):
                            bundle.request = self.session.prepare_request(bundle.request)
                        if not isinstance(bundle.request, PreparedRequest):
                            raise TypeError('Request must be an instance of: str (or unicode), Request, PreparedRequest, not %s.' % type(bundle.request))
                    except Exception as ex:
                        # An exception here isn't recoverable, so don't bother testing for retries
                        bundle.exception = ex
                        bundle.traceback = exc_info()[2]
                        responseIterator._add(bundle, requestIndex)
                        continue
                else:
                    bundle, responseIterator, group, requestIndex, numTries = self._retryQueue.pop()

                #print('(Execute   ) %s [%d] %d, %s, %d, %s' % ( time(), responseIterator._counter, responseIterator._inflight, responseIterator._done, len(responseIterator._responses), bundle.request.url ))
                g = Greenlet(self._execute, bundle)
                # Attach data as a property, right on the greenlet.  This way, we won't lose the information if the greenlet is killed before it starts
                g.data = ( bundle, responseIterator, group, requestIndex, numTries )
                g.rawlink(self._response)
                self.pool.start(g)

                if self.minSecondsBetweenRequests > 0:
                    sleep(self.minSecondsBetweenRequests)

            except GreenletExit:
                self._kill()
Exemple #2
0
    def _run(self):
        while True:
            try:
                self.pool.wait_available()
                reqGroup = self._requestQueue.getLatestGroup()
                retryGroup = self._retryQueue.getLatestGroup()

                if reqGroup is None and retryGroup is None:
                    if self._killed:
                        break
                    else:
                        self._requestAdded.clear()
                        self._requestAdded.wait(
                            self._retryQueue.getMinWaitTime())
                        continue

                if retryGroup is None or (reqGroup is not None
                                          and reqGroup > retryGroup):
                    request, responseIterator, group, requestIndex = self._requestQueue.pop(
                    )
                    numTries = 0

                    if isinstance(request, tuple):
                        bundle = Bundle(request[0])
                        bundle.obj = request[1]
                        bundle.hasobj = True
                    else:
                        bundle = Bundle(request)

                    if self._skip(bundle):
                        responseIterator._add(bundle, requestIndex)
                        continue

                    try:
                        if isinstance(bundle.request, basestring):
                            bundle.request = Request(method='GET',
                                                     url=bundle.request)
                        if isinstance(bundle.request, Request):
                            bundle.request = self.session.prepare_request(
                                bundle.request)
                        if not isinstance(bundle.request, PreparedRequest):
                            raise TypeError(
                                'Request must be an instance of: str (or unicode), Request, PreparedRequest, not %s.'
                                % type(bundle.request))
                    except Exception as ex:
                        # An exception here isn't recoverable, so don't bother testing for retries
                        bundle.exception = ex
                        bundle.traceback = exc_info()[2]
                        responseIterator._add(bundle, requestIndex)
                        continue
                else:
                    bundle, responseIterator, group, requestIndex, numTries = self._retryQueue.pop(
                    )

                #print('(Execute   ) %s [%d] %d, %s, %d, %s' % ( time(), responseIterator._counter, responseIterator._inflight, responseIterator._done, len(responseIterator._responses), bundle.request.url ))
                g = Greenlet(self._execute, bundle)
                # Attach data as a property, right on the greenlet.  This way, we won't lose the information if the greenlet is killed before it starts
                g.data = (bundle, responseIterator, group, requestIndex,
                          numTries)
                g.rawlink(self._response)
                self.pool.start(g)

                if self.minSecondsBetweenRequests > 0:
                    sleep(self.minSecondsBetweenRequests)

            except GreenletExit:
                self._kill()
Exemple #3
0
class Runnable:
    """Greenlet-like class, __run() inside one, but can be stopped and restarted

    Allows subtasks to crash self, and bubble up the exception in the greenlet
    In the future, when proper restart is implemented, may be replaced by actual greenlet
    """

    greenlet: Greenlet
    args: Sequence = tuple()  # args for _run()
    kwargs: dict = dict()  # kwargs for _run()

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        self.args = args
        self.kwargs = kwargs

        self.greenlet = Greenlet(self._run, *self.args, **self.kwargs)
        self.greenlet.name = f"{self.__class__.__name__}|{self.greenlet.name}"
        self.greenlets: List[Greenlet] = list()

    def start(self) -> None:
        """ Synchronously start task

        Reimplements in children an call super().start() at end to start _run()
        Start-time exceptions may be raised
        """
        if self.greenlet:
            raise RuntimeError(f"Greenlet {self.greenlet!r} already started")
        pristine = (not self.greenlet.dead
                    and tuple(self.greenlet.args) == tuple(self.args)
                    and self.greenlet.kwargs == self.kwargs)
        if not pristine:
            self.greenlet = Greenlet(self._run, *self.args, **self.kwargs)
            self.greenlet.name = f"{self.__class__.__name__}|{self.greenlet.name}"
        self.greenlet.start()

    def _run(self, *args: Any, **kwargs: Any) -> None:
        """ Reimplements in children to busy wait here

        This busy wait should be finished gracefully after stop(),
        or be killed and re-raise on subtasks exception """
        raise NotImplementedError

    def stop(self) -> None:
        """ Synchronous stop, gracefully tells _run() to exit

        Should wait subtasks to finish.
        Stop-time exceptions may be raised, run exceptions should not (accessible via get())
        """
        raise NotImplementedError

    def on_error(self, subtask: Greenlet) -> None:
        """ Default callback for substasks link_exception

        Default callback re-raises the exception inside _run() """
        log.error(
            "Runnable subtask died!",
            this=self,
            running=bool(self),
            subtask=subtask,
            exc=subtask.exception,
        )
        if not self.greenlet:
            return

        exception = subtask.exception or GreenletExit()
        self.greenlet.kill(exception)

    def _schedule_new_greenlet(self,
                               func: Callable,
                               *args: Any,
                               in_seconds_from_now: int = None,
                               **kwargs: Any) -> Greenlet:
        """ Spawn a sub-task and ensures an error on it crashes self/main greenlet """
        def on_success(greenlet: Greenlet) -> None:
            if greenlet in self.greenlets:
                self.greenlets.remove(greenlet)

        greenlet = Greenlet(func, *args, **kwargs)
        greenlet.link_exception(self.on_error)
        greenlet.link_value(on_success)
        self.greenlets.append(greenlet)
        if in_seconds_from_now:
            greenlet.start_later(in_seconds_from_now)
        else:
            greenlet.start()
        return greenlet

    def __bool__(self) -> bool:
        return bool(self.greenlet)

    def rawlink(self, callback: Callable) -> None:
        if not self.greenlet:
            return
        self.greenlet.rawlink(callback)