Ejemplo n.º 1
0
    def __init__(
        self,
        host_id,
        talker,
        raise_on_failure=True,
        retcode=(0, ),
        args=None,
        timeout=HOUR,
        server_timeout=True,
        line_timeout=None,
        log_file=None,
        name=None,
        max_output_per_channel=None,
        set_new_logpath=None,
    ):
        self.name = name
        self.job_id = str(uuid4())
        self.talker = talker
        self.retcode = None
        self.aborted_by = None
        self.ack = None
        self.stdout = []
        self.stderr = []
        self.raise_on_failure = raise_on_failure
        self.good_codes = retcode
        self.args = args
        self.host_id = host_id
        self.hostname = host_id.partition('.')[-1]
        self.ack_timer = None
        self.handling_timer = None
        self.timeout = timeout or HOUR  # No command should run over hour, unless specified explicitly
        self.server_timeout = server_timeout
        self.line_timeout = Duration(line_timeout) if line_timeout else None
        self.log_file = log_file
        self.max_output_per_channel = max_output_per_channel or MAX_OUTPUT_PER_CHANNEL
        self.set_new_logpath = set_new_logpath
        self.client_timer = Timer(
            expiration=self.timeout if not server_timeout else self.timeout +
            5)
        self._line_timeout_synced = False
        self.attempts = 0  # in case of TalkerCommandLost

        # this is the version from which the agent acknowledges receipt of command
        self.ack_supported = self.talker.agent_version >= V1_3_1
        # this is the version from which the agent is ignores params it does not support
        self.kwargs_resilient = self.talker.agent_version >= V1_3_1
        # this is the version from which the agent is ignores params it does not support
        self.pid_supported = self.talker.agent_version >= V1_6_0
Ejemplo n.º 2
0
 def _logged_func(self):
     stack = ExitStack()
     self.exc = None
     self.timer = Timer()
     stack.callback(self.timer.stop)
     stack.callback(self.stop)
     try:
         if not self.console_logging:
             stack.enter_context(_logger.suppressed())
         _logger.debug("%s - starting", self)
         while True:
             self._result = self.func(*self.args, **self.kwargs)
             if not self.loop:
                 return
             if self.wait(self.sleep):
                 _logger.debug("%s - stopped", self)
                 return
     except ProcessExiting as exc:
         _logger.debug(exc)
         raise
     except KeyboardInterrupt as exc:
         _logger.silent_exception("KeyboardInterrupt in thread running %s:",
                                  self.func)
         self.exc = apply_timestamp(exc)
         if IS_GEVENT:
             raise  # in gevent we should let this exception propagate to the main greenlet
     except Exception as exc:
         _logger.silent_exception(
             "Exception in thread running %s: %s (traceback can be found in debug-level logs)",
             self.func, type(exc))
         self.exc = apply_timestamp(exc)
     finally:
         stack.close()
Ejemplo n.º 3
0
    def acquire(self, blocking=True, timeout=-1, lease_expiration=None):
        if not blocking:
            ret = self._lock.acquire(blocking=False)
            lease_timer = self._lease_timer  # touch it once, so we don't hit a race since it occurs outside of the lock acquisition
            if ret:
                self._acquired(lease_expiration)
            elif lease_timer and lease_timer.expired:
                raise LockLeaseExpired(lock=self)
            return ret

        # this timer implements the 'timeout' parameter
        acquisition_timer = Timer(expiration=NEVER if timeout < 0 else timeout)
        while not acquisition_timer.expired:

            # the timeout on actually acquiring this lock is the minimum of:
            # 1. the time remaining on the acquisition timer, set by the 'timeout' param
            # 2. the logging interval - the minimal frequency for logging while the lock is awaited
            # 3. the time remaining on the lease timer, which would raise if expired
            timeout = min(acquisition_timer.remain, self._log_interval)
            lease_timer = self._lease_timer  # touch it once, so we don't hit a race since it occurs outside of the lock acquisition
            if lease_timer:
                timeout = min(lease_timer.remain, timeout)

            if self._lock.acquire(blocking=True, timeout=timeout):
                self._acquired(lease_expiration, should_log=acquisition_timer.elapsed > self._MIN_TIME_FOR_LOGGING)
                return True

            lease_timer = self._lease_timer  # touch it once, so we don't hit a race since it occurs outside of the lock acquisition
            if lease_timer and lease_timer.expired:
                raise LockLeaseExpired(lock=self)

            _logger.debug("%s - waiting...", self)
Ejemplo n.º 4
0
    def exclusive(self, need_to_wait_message=None):
        while not self.cond.acquire(timeout=15):
            _logger.debug("%s - waiting...", self)

        # wait until this thread is the sole owner of this lock
        while not self.cond.wait_for(lambda: self.owner_count == self.owners[_get_my_ident()], timeout=15):
            _check_exiting()
            if need_to_wait_message:
                _logger.info(need_to_wait_message)
                need_to_wait_message = None  # only print it once
            _logger.debug("%s - waiting (for exclusivity)...", self)
        my_ident = _get_my_ident()
        self.owners[my_ident] += 1
        self._lease_timer = Timer()
        _verbose_logger.debug("%s - acquired (exclusively)", self)
        try:
            yield
        finally:
            _verbose_logger.debug('%s - releasing (exclusive)', self)
            self._lease_timer = None
            self.owners[my_ident] -= 1
            if not self.owners[my_ident]:
                self.owners.pop(my_ident)  # don't inflate the soft lock keys with threads that does not own it
            self.cond.notify()
            self.cond.release()
Ejemplo n.º 5
0
 def _logged_func(self):
     stack = ExitStack()
     self.exc = None
     self.timer = Timer()
     stack.callback(self.timer.stop)
     stack.callback(self.stop)
     try:
         if not self.console_logging:
             stack.enter_context(_logger.suppressed())
         _logger.debug("%s - starting", self)
         while True:
             self._result = self.func(*self.args, **self.kwargs)
             if not self.loop:
                 return
             if self.wait(self.sleep):
                 _logger.debug("%s - stopped", self)
                 return
     except ProcessExiting as exc:
         _logger.debug(exc)
         raise
     except (KeyboardInterrupt, Exception) as exc:
         _logger.silent_exception(
             "Exception in thread running %s: %s (traceback can be found in debug-level logs)",
             self.func, type(exc))
         self.exc = exc
         try:
             exc.timestamp = time.time()
         except Exception:
             pass
     finally:
         stack.close()
Ejemplo n.º 6
0
    def __call__(self):
        if not GRAPHICAL:
            yield self
            return

        handler = get_console_handler()
        with self._lock:
            self._depth += 1
            if self._depth == 1:
                self.set_message("Waiting...")
                self._stop = False
                self._timer = Timer()
                self._format = handler.formatter.format if handler else lambda record: record.getMessage(
                )
                self._thread = threading.Thread(target=self.loop,
                                                name="ProgressBar",
                                                daemon=True)
                self._thread.start()
        try:
            yield self
        finally:
            with self._lock:
                self._depth -= 1
                if self._depth <= 0:
                    self._stop = True
                    self._event.set()
                    self._thread.join()
                    self._thread = None
Ejemplo n.º 7
0
    def get_connection(self, timeout_override=None):
        if timeout_override:
            kw = dict(self._conn_params, timeout=timeout_override)
            _logger.debug("creating an ad-hoc connection (timeout=%s)",
                          timeout_override)
            return httpclient.HTTPConnection(**kw)

        conn = self._conn

        if not conn:
            _logger.debug("creating a new connection (timeout=%s, ttl=%s)",
                          self._conn_params.timeout, self._ttl)
        elif conn.timer.expired:
            _logger.debug("connection expired (%ss ago) - resetting",
                          conn.timer.expired)
            conn = None
        else:
            conn.timer.reset()

        if not conn:
            conn = httpclient.HTTPConnection(**self._conn_params)
            if not self._ttl:
                return conn
            conn.timer = Timer(expiration=self._ttl)
        self._conn = conn
        return conn
Ejemplo n.º 8
0
    def logged_wait(self, timeout=None, initial_log_interval=2 * MINUTE):
        """
        Wait for all futures to complete, logging their status along the way.
        Logging will occur at an every-increasing log interval, beginning with ``initial_log_interval``,
        and increasing 5-fold (x5) every 5 iterations.
        """
        log_interval = initial_log_interval
        global_timer = Timer(expiration=timeout)
        iteration = 0

        while not global_timer.expired:
            completed, pending = self.wait(log_interval)
            if not pending:
                break

            iteration += 1
            if iteration % 5 == 0:
                log_interval *= 5
            with _logger.indented("(Waiting for %s on %s/%s tasks...)",
                                  time_duration(global_timer.elapsed),
                                  len(pending),
                                  sum(map(len, (completed, pending))),
                                  level=logging.WARNING,
                                  footer=False):
                self.dump_stacks(pending, verbose=global_timer.elapsed >= HOUR)
Ejemplo n.º 9
0
 def _logged_func(self):
     self.timer = Timer()
     self.exc = None
     try:
         _logger.debug("%s - starting", self)
         while True:
             self.result = self.func(*self.args, **self.kwargs)
             if not self.loop:
                 return
             if self.wait(self.sleep):
                 _logger.debug("%s - stopped", self)
                 return
     except Exception as exc:
         _logger.silent_exception("Exception in thread running %s (traceback can be found in debug-level logs)", self.func)
         self.exc = exc
     finally:
         self.timer.stop()
         self.stop()
Ejemplo n.º 10
0
 def wait(self, timeout=None):
     if self.real_thread_no_greenlet:
         # we can't '.wait' on this gevent event object, so instead we test it and sleep manually:
         timer = Timer(expiration=timeout)
         while not timer.expired:
             if self.stopper.is_set():
                 return True
             non_gevent_sleep(0.1)
         return False
     return self.stopper.wait(timeout)
Ejemplo n.º 11
0
def retry(times,
          func,
          args=[],
          kwargs={},
          acceptable=Exception,
          sleep=1,
          max_sleep=False,
          log_level=logging.DEBUG,
          pred=None,
          unacceptable=()):

    if unacceptable is None:
        unacceptable = ()
    elif isinstance(unacceptable, tuple):
        unacceptable += UNACCEPTABLE_EXCEPTIONS
    else:
        unacceptable = (unacceptable, ) + UNACCEPTABLE_EXCEPTIONS

    if isinstance(times, Timer):
        stopper = times  # a timer is a valid stopper
    elif isinstance(times, Duration):
        stopper = Timer(expiration=times)
    elif isinstance(times, int):
        stopper = ExpiringCounter(times)
    else:
        assert False, "'times' must be an 'int', 'Duration' or 'Timer', got %r" % times

    if max_sleep:
        sleep = RandomExponentialBackoff(sleep, max_sleep)
    if not pred:
        pred = lambda exc: True

    while True:
        try:
            return func(*args, **kwargs)
        except unacceptable as exc:
            raise
        except acceptable as exc:
            raise_if_async_exception(exc)
            if not pred(exc):
                raise
            if stopper.expired:
                raise
            _logger.log(log_level, "Exception thrown: %r", exc)
            sleep_for = 0
            if sleep:
                # support for ExponentialBackoff
                sleep_for = sleep() if callable(sleep) else sleep
            _logger.log(log_level, "Retrying... (%r remain) in %s seconds",
                        stopper.remain, sleep_for)
            time.sleep(sleep_for)
Ejemplo n.º 12
0
def test_time_interval1():

    st = 150000000
    t = Timer(st)
    t.t1 = t.t0 + 1

    ti = t.to_interval()

    assert t in ti

    assert t.t0 in ti
    assert t.t1 in ti

    assert t.t0 - 1 not in ti
    assert t.t1 + 1 not in ti

    assert ti.duration == t.duration
    assert ti.duration == 1
    assert ti.duration_delta.total_seconds() == 1

    assert str(ti) == '<TI 05:40:00..(1s)..05:40:01>'

    assert str(t.to_interval().to_timer()) == str(t)
Ejemplo n.º 13
0
 def _get_main_loop(self):
     while True:
         items = []
         items.append(self._commands_queue.get())
         t = Timer(expiration=10.0 / 1000.0)
         while not t.expired:
             try:
                 items.append(self._commands_queue.get(timeout=t.remain))
             except Empty:
                 break
         if not items:
             continue
         self._current_workers.acquire(timeout=10)
         self._executors.submit(_run_with_exception_logging,
                                self._send_data, (items, ), {}, {})
Ejemplo n.º 14
0
def test_wait_long_predicate():
    """
    After the actual check the predicate is held for 3 seconds. Make sure
    that we don't get a timeout after 2 seconds - because the actual
    condition should be met in 1 second!
    """

    t = Timer()

    def pred():
        try:
            return 0.1 < t.duration
        finally:
            wait(0.3)

    wait(0.2, pred, message=False)
Ejemplo n.º 15
0
    def logged_wait(self, timeout=None, initial_log_interval=None):
        log_interval = initial_log_interval or 2*MINUTE
        global_timer = Timer(expiration=timeout)
        iteration = 0

        while not global_timer.expired:
            completed, pending = self.wait(log_interval)
            if not pending:
                break

            iteration += 1
            if iteration % 5 == 0:
                log_interval *= 5
            with _logger.indented("(Waiting for %s on %s/%s tasks...)",
                                  time_duration(global_timer.elapsed),
                                  len(pending), sum(map(len, (completed, pending))),
                                  level=logging.WARNING, footer=False):
                self.dump_stacks(pending, verbose=global_timer.elapsed >= HOUR)
Ejemplo n.º 16
0
 def wait_and_kill(self):
     timer = Timer(expiration=self.timeout)
     if not self.quiet:
         _logger.info("Timebomb set - this process will YELLOW<<self-destruct>> in RED<<%r>>...", timer.remain)
     while not timer.expired:
         if self.alert_interval:
             _logger.info("Time Elapsed: MAGENTA<<%r>>", timer.elapsed)
         log_level = logging.WARNING if timer.remain < 5*MINUTE else logging.DEBUG
         _logger.log(log_level, "This process will YELLOW<<self-destruct>> in RED<<%r>>...", timer.remain)
         if self.fuse.wait(min(self.alert_interval or 60, timer.remain)):
             _logger.info("Timebomb cancelled")
             return
     _logger.warning("RED<< 💣 Timebomb Expired! 💣 >>")
     with _logger.indented("Killing children"):
         kill_subprocesses()
     with _logger.indented("Committing suicide"):
         kill_this_process(graceful=False)
         raise Exception("Timebomb Expired")
Ejemplo n.º 17
0
    def put_command(self, *, job_repr, **job_data):
        """
        Put the command in the commands queue
        For internal use

        :param [str] job_repr: A string to print the command for logging purposes
        :param [dict] job_data: Command data to send to the Talker agent
        """
        get_logger().debug("submitting job [%s] (%s)", job_repr, self.job_id)
        self.talker.reactor.rpush(self._commands_key,
                                  json.dumps(job_data),
                                  _callback=self.on_sent,
                                  _cmd_id=self.job_id)
        # making expiration long enough so it's unlikely to expire in the middle pushing a new command - see WEKAPP-86513
        # a 5m expiration could fall exactly when we push a new command, causing it to expire before the agent pops it out;
        # a 1d expiration is long enough since new commands are likely to be pushed in the interim, extending the expiration
        # we can't call 'expire' before 'rpush', since it will be a no-op if the key doesn't exist yet
        self.talker.reactor.expire(self._commands_key, timedelta(days=1))
        self.handling_timer = Timer(expiration=AGENT_SEND_TIMEOUT)
Ejemplo n.º 18
0
def test_iter_wait_progress_inbetween_sleep():
    data = Bunch(a=3)

    def get():
        data.a -= 1
        return data.a

    sleep = .07
    g = iter_wait_progress(get, advance_timeout=10, sleep=sleep)

    # first iteration should be immediate
    t = Timer()
    next(g)
    assert t.duration < sleep

    # subsequent iteration should be at least 'sleep' long
    next(g)
    assert t.duration >= sleep

    for state in g:
        pass
    assert state.finished is True
Ejemplo n.º 19
0
def retry(times: Union[int, Duration, Timer],
          func,
          args=[],
          kwargs={},
          acceptable=Exception,
          sleep=1,
          max_sleep=False,
          log_level=logging.DEBUG,
          pred=None,
          unacceptable=()):
    """
    Runs a function again and again until it finishes without throwing.

    :param times: Limit number of attempts before errors are propagated instead of suppressed.
                  If an `int`, the execution is retried at most `times` times.
                  If a `Timer`, retries until the `times` expires.
                  If a `Duration`, retires for the specified duarion.
    :param func: The function to run.
    :param list args: Positional arguments for the function.
    :param dict kwargs: Keyword arguments for the function.
    :param acceptable: Exception (or tuple of exceptions) to repeat the function on.
    :param sleep: Time to wait between multiple retries.
    :param max_sleep: If set, increase the sleep after each retry until reaching this number.
    :param log_level: Before retrying, log the exceptions at that level.
    :param pred: If set, decide whether or not to retry on an exception.
    :param unacceptable: Exception (or tuple of exceptions) to not repeat the function on and re-raise them instead.
    """

    if unacceptable is None:
        unacceptable = ()
    elif isinstance(unacceptable, tuple):
        unacceptable += UNACCEPTABLE_EXCEPTIONS
    else:
        unacceptable = (unacceptable, ) + UNACCEPTABLE_EXCEPTIONS

    if isinstance(times, Timer):
        stopper = times  # a timer is a valid stopper
    elif isinstance(times, Duration):
        stopper = Timer(expiration=times)
    elif isinstance(times, int):
        stopper = ExpiringCounter(times)
    else:
        assert False, "'times' must be an 'int', 'Duration' or 'Timer', got %r" % times

    if max_sleep:
        sleep = RandomExponentialBackoff(sleep, max_sleep)
    if not pred:
        pred = lambda exc: True

    while True:
        try:
            return func(*args, **kwargs)
        except unacceptable as exc:
            raise
        except acceptable as exc:
            raise_if_async_exception(exc)
            if not pred(exc):
                raise
            if stopper.expired:
                raise
            _logger.log(log_level, "Exception thrown: %r", exc)
            sleep_for = 0
            if sleep:
                # support for ExponentialBackoff
                sleep_for = sleep() if callable(sleep) else sleep
            _logger.log(log_level, "Retrying... (%r remain) in %s seconds",
                        stopper.remain, sleep_for)
            time.sleep(sleep_for)
Ejemplo n.º 20
0
 def _acquired(self, lease_expiration, should_log=False):
     # we don't want to replace the lease timer, so not to effectively extend the original lease
     self._lease_timer = self._lease_timer or Timer(expiration=lease_expiration or self._lease_expiration)
     if should_log:
         _logger.debug("%s - acquired", self)
Ejemplo n.º 21
0
class Cmd(object):
    """
    A class to represent a command to run on a host

    :param [str] host_id: Host ID to run the command on
    :param [TalkerClient] talker: The Talker client that is used to send the command
    :param [bool] raise_on_failure: Should the command raise an exception on failure
    :param [Tuple[int]] retcode: What return codes are considered success
    :param [List[str]] args: List of the command arguments
    :param [float, int] timeout: Command timeout in seconds
    :param [float, int] server_timeout: Command timeout on the agent in seconds
    :param [float, int] line_timeout: Command output timeout in seconds
    :param [str] log_file: Log file path to write command output to in the host
    :param [str] name: Command name for logging purposes
    :param [int] max_output_per_channel: Maximum size of the command output to generate in bytes
    :param [str] set_new_logpath: Set a new path for the Talker agent log file
    """
    def __init__(
        self,
        host_id,
        talker,
        raise_on_failure=True,
        retcode=(0, ),
        args=None,
        timeout=HOUR,
        server_timeout=True,
        line_timeout=None,
        log_file=None,
        name=None,
        max_output_per_channel=None,
        set_new_logpath=None,
    ):
        self.name = name
        self.job_id = str(uuid4())
        self.talker = talker
        self.retcode = None
        self.aborted_by = None
        self.ack = None
        self.stdout = []
        self.stderr = []
        self.raise_on_failure = raise_on_failure
        self.good_codes = retcode
        self.args = args
        self.host_id = host_id
        self.hostname = host_id.partition('.')[-1]
        self.ack_timer = None
        self.handling_timer = None
        self.timeout = timeout or HOUR  # No command should run over hour, unless specified explicitly
        self.server_timeout = server_timeout
        self.line_timeout = Duration(line_timeout) if line_timeout else None
        self.log_file = log_file
        self.max_output_per_channel = max_output_per_channel or MAX_OUTPUT_PER_CHANNEL
        self.set_new_logpath = set_new_logpath
        self.client_timer = Timer(
            expiration=self.timeout if not server_timeout else self.timeout +
            5)
        self._line_timeout_synced = False
        self.attempts = 0  # in case of TalkerCommandLost

        # this is the version from which the agent acknowledges receipt of command
        self.ack_supported = self.talker.agent_version >= V1_3_1
        # this is the version from which the agent is ignores params it does not support
        self.kwargs_resilient = self.talker.agent_version >= V1_3_1
        # this is the version from which the agent is ignores params it does not support
        self.pid_supported = self.talker.agent_version >= V1_6_0

    def __repr__(self):
        return "%s(<%s>%s)" % (self.__class__.__name__, self.job_id, "!" if
                               (self.ack_supported and not self.ack) else "")

    @property
    def _result_key(self):
        return "result-%s" % self.job_id

    @property
    def _ack_key(self):
        return "%s-ack" % self._result_key

    @property
    def _stdout_key(self):
        return "%s-stdout" % self._result_key

    @property
    def _stderr_key(self):
        return "%s-stderr" % self._result_key

    @property
    def _exit_code_key(self):
        return "%s-retcode" % self._result_key

    @safe_property
    def _pid_key(self):
        return "%s-pid" % self._result_key

    @property
    def _commands_key(self):
        return "commands-%s" % self.host_id

    @property
    def timed_out(self):
        """
        Is this command timed out

        :rtype: bool
        """
        return self.aborted_by == AbortedBy.timeout

    @property
    def since_started(self):
        """
        Get the time since the command acked

        :rtype: Duration
        """
        return Duration(time.time() - self.ack) if self.ack else None

    def get_pid(self, timeout=JOB_PID_TIMEOUT, wait_for_ack=True):
        """
        Get the command process ID

        :param [int, float] timeout: After how many seconds should we throw a timeout error
        :param [bool] wait_for_ack: Wait for ack before checking for the command PID

        :returns: The command PID
        :rtype: int
        """
        if not self.pid_supported:
            raise NotImplementedError(
                'This version (%s) does not support get_pid - need %s or better'
                % (self.talker.agent_version, V1_6_0))
        if wait_for_ack:
            self.wait(for_ack=True)
        res = self.talker.reactor.get(self._pid_key, timeout=JOB_PID_TIMEOUT)
        return int(res)

    def put_command(self, *, job_repr, **job_data):
        """
        Put the command in the commands queue
        For internal use

        :param [str] job_repr: A string to print the command for logging purposes
        :param [dict] job_data: Command data to send to the Talker agent
        """
        get_logger().debug("submitting job [%s] (%s)", job_repr, self.job_id)
        self.talker.reactor.rpush(self._commands_key,
                                  json.dumps(job_data),
                                  _callback=self.on_sent,
                                  _cmd_id=self.job_id)
        # making expiration long enough so it's unlikely to expire in the middle pushing a new command - see WEKAPP-86513
        # a 5m expiration could fall exactly when we push a new command, causing it to expire before the agent pops it out;
        # a 1d expiration is long enough since new commands are likely to be pushed in the interim, extending the expiration
        # we can't call 'expire' before 'rpush', since it will be a no-op if the key doesn't exist yet
        self.talker.reactor.expire(self._commands_key, timedelta(days=1))
        self.handling_timer = Timer(expiration=AGENT_SEND_TIMEOUT)

    @log_context(host="{.hostname}")
    def send(self):
        """
        Send the command to the Talker agent on the host
        """
        args = [str(a) for a in self.args]
        command_string = ' '.join(x if ' ' not in x else '"%s"' % x
                                  for x in args)
        timeout = self.timeout if self.server_timeout else None

        extras = {}
        if self.kwargs_resilient:
            extras.update(
                max_output_per_channel=self.max_output_per_channel,
                set_new_logpath=self.set_new_logpath,
                line_timeout=self.line_timeout,
                log_file=self.log_file,
            )

        self.put_command(id=self.job_id,
                         cmd=args,
                         timeout=timeout,
                         job_repr=compact(command_string, 100),
                         **extras)

    def on_sent(self):
        if not self.ack_timer:
            self.ack_timer = Timer(
                expiration=TALKER_CONTEXT.ack_timeout
            )  # this is expiration on the agent ack'ing the command

    @property
    def is_sent(self):
        """
        Is the command sent to the host

        :rtype: bool
        """
        return bool(self.ack_timer)

    @log_context(host="{.hostname}")
    def reset_server_timeout(self):
        """
        Reset timeout on the Talker agent for this command
        """
        _logger.debug("Reseting timeout, %s, %s", self.job_id, self.timeout)
        self.client_timer.reset()
        self.talker.reactor.rpush(
            self._commands_key,
            json.dumps(
                dict(id=self.job_id,
                     cmd="reset_timeout",
                     new_timeout=self.timeout)))
        self.talker.reactor.expire(self._commands_key, timedelta(minutes=5))

    @log_context(host="{.hostname}")
    def send_signal(self, sig):
        """
        Send a signal for the process running te command

        :param [int] sig: The signal to send to the command
        """
        self.talker.reactor.rpush(
            self._commands_key,
            json.dumps(dict(
                id=self.job_id,
                cmd="signal",
                signal=sig,
            )))
        self.talker.reactor.expire(self._commands_key, timedelta(minutes=5))

    @log_context(host="{.hostname}")
    def kill(self, graceful_timeout=3):
        """
        Kill the process running the command

        This will send a SIGTERM signal to the process, wait for graceful_timeout seconds,
        check if the process died and if not, send the SIGKILL signal.

        :param [int, float] graceful_timeout: Seconds to wait between the SIGTERM and SIGKILL signals
        """
        self.talker.reactor.rpush(
            self._commands_key,
            json.dumps(
                dict(
                    id=self.job_id,
                    cmd="kill",
                    graceful_timeout=graceful_timeout,
                )))
        self.talker.reactor.expire(self._commands_key, timedelta(minutes=5))

    def _sync_timeout(self, line_timeout):
        if self._line_timeout_synced:
            return
        else:
            self._line_timeout_synced = True

        if line_timeout and line_timeout > self.timeout:
            self.timeout = int(line_timeout * 1.15)
            self.reset_server_timeout()

    def iter_results(self, line_timeout=DAY, timeout=DAY):
        """
        Iterate return code and output results

        :param [int, float] line_timeout: Timeout for new output to be recieved in seconds
        :param [int, float] timeout: Timout for command to finish in seconds

        :returns: A Tuple with the channel name and value. Channel names are `retcode`, `stdout` and `stderr`
        :rtype: Tuple[str, str]
        """
        timeout = timeout or DAY
        line_timeout = line_timeout or DAY
        blpop_timeout = min(line_timeout, timeout // 10, self.timeout // 10,
                            AGENT_ACK_TIMEOUT // 10)

        session_timer = Timer(
            expiration=timeout
        )  # timeout for this invocation (not entire command duration)
        line_timer = Timer(expiration=line_timeout)
        timeout_reset_timer = Timer(expiration=max(line_timeout // 10, 1))
        self._sync_timeout(line_timeout=line_timeout)

        while self.retcode is None:

            if line_timer.expired:
                self.raise_exception(exception_cls=CommandLineTimeout,
                                     line_timeout=Duration(line_timeout))

            if session_timer.expired:
                self.raise_exception(exception_cls=CommandTimeout,
                                     timeout=timeout)

            channels = {
                self._ack_key: 'ack',
                self._stderr_key: 'stderr',
                self._stdout_key: 'stdout',
                self._exit_code_key: 'retcode'
            }
            res = self.talker.reactor.blpop([
                self._ack_key, self._stderr_key, self._stdout_key,
                self._exit_code_key
            ],
                                            timeout=blpop_timeout)
            if not res:
                if not self.ack:
                    # no point checking these timers timeout if the command hasn't been acknowledged yet
                    line_timer.reset()
                    session_timer.reset()

                self.check_client_timeout()
                continue
            else:
                channel_name, value = res
                channel_name = channels[channel_name.decode('utf-8')]

            line_timer.reset()
            if timeout_reset_timer.expired:
                timeout_reset_timer.reset()
                self.reset_server_timeout()

            if channel_name == 'stderr':
                self.on_output(self.stderr, (value, ))
            elif channel_name == 'stdout':
                self.on_output(self.stdout, (value, ))
            elif channel_name == 'retcode':
                # see if any leftovers after receiving exit code
                stdout, stderr = self.get_output()
                for data_channel_name, values in (('stdout', stdout),
                                                  ('stderr', stderr)):
                    for data_value in values:
                        yield (data_channel_name, data_value)
                value = self.set_retcode(value)
            elif channel_name == 'ack':
                self.on_ack(value)
                continue

            yield (channel_name, value)

            self.raise_if_needed()

    def iter_lines(self, line_timeout=DAY, timeout=DAY):
        """
        Iterate command output

        :param [int, float] line_timeout: Timeout for new output to be recieved in seconds
        :param [int, float] timeout: Timout for command to finish in seconds

        :returns: A Tuple containing a line from stdout or None and a line from stderr or None
        :rtype: Tuple[str, str]
        """
        done = False
        parts = {'stdout': '', 'stderr': ''}

        def return_for_channel(channel, line):
            if channel == 'stdout':
                return (line, None)
            elif channel == 'stderr':
                return (None, line)

        def yield_channel(channel):
            lines = parts[channel].splitlines(keepends=True)
            parts[channel] = ''
            for line in lines:
                if line.endswith('\n'):
                    yield return_for_channel(channel, line.strip('\n'))
                else:
                    parts[channel] = line
            if done:
                yield return_for_channel(channel, parts[channel])

        for channel, value in self.iter_results(line_timeout=line_timeout,
                                                timeout=timeout):
            if channel in ['stdout', 'stderr']:
                try:
                    parts[channel] += value.decode('utf-8')
                except UnicodeDecodeError:
                    _logger.warning("Unable to decode %s to utf-8!", value)
                    parts[channel] += "%s" % value
                yield from yield_channel(channel)
            elif channel == 'retcode':
                done = True

    @log_context(host="{.hostname}")
    def log_pipe(self,
                 logger,
                 stdout_lvl=logging.INFO,
                 stderr_lvl=logging.ERROR,
                 line_timeout=DAY):
        """
        Pipe command output to a logger

        :param [logging.Logger] logger: The logger to use to log command output
        :param [int] stdout_lvl: Logging level to use in order to log stdout
        :param [int] stderr_lvl: Logging level to use in order to log stderr
        :param [int, float] line_timeout: Timeout for new output to be recieved in seconds
        """
        for stdout, stderr in self.iter_lines(line_timeout=line_timeout):
            if stdout_lvl is not None and stdout:
                logger.log(stdout_lvl, stdout)
            if stderr_lvl is not None and stderr:
                logger.log(stderr_lvl, stderr)

    @log_context(host="{.hostname}")
    def foreground(self, stdout=True, stderr=True):
        """
        Print command output as it is recieved from the agent

        :param [bool] stdout: Should stdout be printed
        :param [bool] stderr: Should stderr be printed

        :returns: The command return code when done
        :rtype: int
        """
        should_print = dict(stdout=stdout, stderr=stderr)
        for channel, value in self.iter_results():
            if channel in ['stdout', 'stderr'] and should_print[channel]:
                print(value.decode('utf-8'), end='')
            elif channel == 'retcode':
                return self.retcode
            else:
                self.raise_exception(exception_cls=UnknownChannel,
                                     channel=channel)

    def check_client_timeout(self):
        """
        Check if the command has timed out

        This will raise an appropriate exception depends on what has timed out:
        * If the reactor haven't sent the command before the handling timer expired: TalkerClientSendTimeout
        * If the command has yet to be acked and the agent is alive: TalkerCommandLost
        * If the command has yet to be acked and the agent is not responding: TalkerServerTimeout
        * If ack is not supported and the command has expired: ClientCommandTimeoutError
        """
        if not self.is_sent:
            # client/reactor did not send command yet
            if self.handling_timer.expired:
                self.raise_exception(exception_cls=TalkerClientSendTimeout,
                                     timeout=self.handling_timer.elapsed)
        elif self.ack_supported and not self.ack:
            is_talker_alive_name = 'is_talker_alive'

            # client did not receive command - check all timers
            if self.ack_timer.expired or self.client_timer.expired:
                talker_alive = False
                if self.name != is_talker_alive_name:  # prevent recursion
                    try:
                        self.talker.run(self.host_id,
                                        'true',
                                        name=is_talker_alive_name,
                                        timeout=5).wait()
                    except TalkerServerTimeout:
                        pass
                    else:
                        # check again, since talker may have just booted
                        self.poll(check_client_timeout=False)
                        if self.ack:
                            # seems like our command was finally answered
                            _logger.debug(
                                "we thought TalkerCommandLost, but finally not so... (%s)",
                                self)
                            return
                        talker_alive = True
                        if self.attempts < TALKER_COMMAND_LOST_RETRY_ATTEMPTS:
                            self.attempts += 1
                            self.ack_timer = None
                            self.send()
                            return

                exception_cls = TalkerCommandLost if talker_alive else TalkerServerTimeout
                self.raise_exception(exception_cls=exception_cls,
                                     timeout=self.ack_timer.elapsed)
        elif self.client_timer.expired:
            self.raise_exception(exception_cls=ClientCommandTimeoutError,
                                 timeout=self.ack_timer.elapsed,
                                 started=self.since_started)

    def raise_if_needed(self):
        """
        Raise an error for the command failure. If the command succeeded, do nothing
        """
        if self.aborted_by == AbortedBy.timeout:
            self.raise_exception(exception_cls=CommandTimeoutError,
                                 timeout=Duration(self.timeout))
        if self.aborted_by == AbortedBy.reboot:
            self.raise_exception(exception_cls=CommandAbortedByReboot)
        if self.aborted_by == AbortedBy.overflowed:
            self.raise_exception(
                exception_cls=CommandAbortedByOverflow,
                max_output_per_channel=self.max_output_per_channel)
        if self.aborted_by == AbortedBy.hanging:
            self.raise_exception(exception_cls=CommandHanging)
        if self.aborted_by == AbortedBy.orphaned:
            self.raise_exception(exception_cls=CommandOrphaned)
        if self.aborted_by == AbortedBy.line_timeout:
            self.raise_exception(exception_cls=CommandLineTimeout,
                                 line_timeout=self.line_timeout)
        if self.aborted_by == AbortedBy.error:
            _, traceback = self.get_output(new=False)
            traceback = self._decode_output(traceback, safe=True).strip()
            exception = traceback.splitlines()[-1]
            raise TalkerError(host_id=self.host_id,
                              hostname=self.hostname,
                              fulltb=traceback,
                              _exception=exception)
        if self.retcode is None:
            return
        if self.raise_on_failure and self.retcode not in self.good_codes:
            self.raise_exception()

    def raise_exception(self, exception_cls=CommandExecutionError, **params):
        """
        Raise an exception for this command
        """
        def process_output(d):
            text = self._decode_output(d, safe=True)
            if len(text) > 1010:
                text = "...%s" % text[-1000:]
            return text

        log_files = self.log_file if isinstance(
            self.log_file, (tuple, list)) else [self.log_file] * 2
        stds = [None] * 2 if all(log_files) else (process_output(std)
                                                  for std in self.get_output(
                                                      new=False))
        for (param, std, log_file) in zip("stdout stderr".split(), stds,
                                          log_files):
            params[param] = ("<%s>" % log_file) if log_file else std

        raise exception_cls(cmd=self.job_id,
                            name=self.name or "(unnamed command)",
                            args=" ".join(map(str, self.args)),
                            host_id=self.host_id,
                            hostname=self.hostname,
                            retcode=self.retcode,
                            since_started=self.since_started,
                            talker=self.talker,
                            **params)

    def on_polled(self, exit_code):
        """
        Report return code for the command 

        :param [int] exit_code: The command return code

        :returns: The command return code or None if not yet set
        :rtype: int
        """
        if self.retcode is not None:
            return self.retcode
        if exit_code is None:
            return
        _verbose_logger.debug("%s: exit-code received (%s)", self, exit_code)
        self.set_retcode(exit_code)
        return self.retcode

    def poll(self, check_client_timeout=True):
        """
        Check if the command is finished and return it's return code

        :param [bool] check_client_timeout: Also check if the command timed out

        :returns: The command return code if done
        :rtype: int
        """
        if self.retcode is not None:
            return self.retcode

        if self.ack_supported and not self.ack:
            ack = self.talker.reactor.lpop(self._ack_key)
            if ack is None:
                if check_client_timeout:
                    self.check_client_timeout()
                return
            self.on_ack(ack)

        exit_code = self.talker.reactor.lpop(self._exit_code_key)
        if exit_code is None:
            if check_client_timeout:
                self.check_client_timeout()
        return self.on_polled(exit_code)

    def set_retcode(self, exit_code):
        """
        Set the command return code

        :param [int] exit_code: The command return code
        """
        if exit_code is None:
            return
        try:
            exit_code = int(exit_code)
        except ValueError:
            exit_code = exit_code.decode('utf-8')

        self.on_sent()  # sometimes we get here before the callback is called
        self.ack_timer.stop()

        if isinstance(exit_code, int):
            self.retcode = exit_code
        elif hasattr(AbortedBy, exit_code):
            self.aborted_by = getattr(AbortedBy, exit_code)
            self.retcode = -1
        else:
            self.raise_exception(exception_cls=UnknownExitCode,
                                 exit_code=exit_code)
        self.raise_if_needed()

    def wait(self, for_ack=False):
        """
        Wait until the command is done

        :param [int] for_ack: Wait for the command to be acked instead of command to finish

        :returns: The command return code when done
        :rtype: int
        """
        while True:

            if for_ack:
                assert self.ack_supported, "ack is not supported"
                if self.ack is not None:
                    return self.ack

            if self.retcode is not None:
                return self.retcode

            # we don't want to wait too long, cause we want to raise timeout exceptions promptly
            blpop_timeout = (1 if not self.is_sent else
                             self.ack_timer.expiration // 10 if
                             (self.ack_supported and not self.ack) else
                             self.client_timer.expiration // 10)

            result = self.talker.reactor.blpop(
                [self._exit_code_key, self._ack_key], timeout=blpop_timeout)
            if result is None:
                self.check_client_timeout()
                continue

            key, value = result
            key = key.decode()
            if key == self._ack_key:
                _verbose_logger.debug("%s: ack received (%s)", self, value)
                self.on_ack(value)
                continue

            return self.on_polled(result[1])

    def result(self, decode='utf8'):
        """
        Wait until the command is done and return it's stdout output

        :param [str] decode: Expected output encoding

        :returns: The command stdout output
        :rtype: str
        """
        self.wait()
        stdout, _ = self.get_output(new=False)
        return self._decode_output(stdout, decode=decode)

    def _decode_output(self, output, decode='utf8', safe=False):
        ret = b''.join(output)
        if decode:
            ret = ret.decode(decode, errors='replace' if safe else 'strict')
        return ret

    def _request_outputs(self, pipeline):
        pipeline.lrange(self._stdout_key, 0, -1)
        pipeline.lrange(self._stderr_key, 0, -1)

    def _fetch_outputs(self):
        with self.talker.reactor.pipeline() as p:
            self._request_outputs(p)
            new_stdout, new_stderr = p.execute()
        return new_stdout, new_stderr

    def _trim_outputs(self, stdout, stderr, pipeline=None):
        len_stdout = len(stdout)
        len_stderr = len(stderr)
        with ExitStack() as stack:
            if pipeline is not None:
                p = pipeline
            else:
                p = stack.enter_context(self.talker.reactor.pipeline())
            for data_len, redis_key in \
                    ((len_stdout, self._stdout_key),
                     (len_stderr, self._stderr_key)):
                if data_len:
                    p.ltrim(redis_key, data_len, -1)
            if pipeline is None:
                p.execute()

    def on_output(self, channel, data):
        """
        Update the command output

        :param [List[bytes]] channel: The channel to update. One of `self.stdout` or `self.stderr`
        :param [bytes] data: New data to add to one of the channels
        """
        if data:
            channel.extend(data)

    def on_ack(self, ack):
        """
        Update the command ack

        :param [bytes] ack: The command start time on the host
        """
        self.ack = float(ack.decode())

    def get_output(self, new=True):
        """
        Get raw command output

        :param [bool] new: Get only new output

        :returns: Stdout and stderr from the command
        :rtype: Tuple[bytes, bytes]
        """
        new_stdout, new_stderr = self._fetch_outputs()
        self.on_output(self.stdout, new_stdout)
        self.on_output(self.stderr, new_stderr)
        self._trim_outputs(stdout=new_stdout, stderr=new_stderr)
        if new:
            return new_stdout, new_stderr
        else:
            return self.stdout, self.stderr
Ejemplo n.º 22
0
    def iter_results(self, line_timeout=DAY, timeout=DAY):
        """
        Iterate return code and output results

        :param [int, float] line_timeout: Timeout for new output to be recieved in seconds
        :param [int, float] timeout: Timout for command to finish in seconds

        :returns: A Tuple with the channel name and value. Channel names are `retcode`, `stdout` and `stderr`
        :rtype: Tuple[str, str]
        """
        timeout = timeout or DAY
        line_timeout = line_timeout or DAY
        blpop_timeout = min(line_timeout, timeout // 10, self.timeout // 10,
                            AGENT_ACK_TIMEOUT // 10)

        session_timer = Timer(
            expiration=timeout
        )  # timeout for this invocation (not entire command duration)
        line_timer = Timer(expiration=line_timeout)
        timeout_reset_timer = Timer(expiration=max(line_timeout // 10, 1))
        self._sync_timeout(line_timeout=line_timeout)

        while self.retcode is None:

            if line_timer.expired:
                self.raise_exception(exception_cls=CommandLineTimeout,
                                     line_timeout=Duration(line_timeout))

            if session_timer.expired:
                self.raise_exception(exception_cls=CommandTimeout,
                                     timeout=timeout)

            channels = {
                self._ack_key: 'ack',
                self._stderr_key: 'stderr',
                self._stdout_key: 'stdout',
                self._exit_code_key: 'retcode'
            }
            res = self.talker.reactor.blpop([
                self._ack_key, self._stderr_key, self._stdout_key,
                self._exit_code_key
            ],
                                            timeout=blpop_timeout)
            if not res:
                if not self.ack:
                    # no point checking these timers timeout if the command hasn't been acknowledged yet
                    line_timer.reset()
                    session_timer.reset()

                self.check_client_timeout()
                continue
            else:
                channel_name, value = res
                channel_name = channels[channel_name.decode('utf-8')]

            line_timer.reset()
            if timeout_reset_timer.expired:
                timeout_reset_timer.reset()
                self.reset_server_timeout()

            if channel_name == 'stderr':
                self.on_output(self.stderr, (value, ))
            elif channel_name == 'stdout':
                self.on_output(self.stdout, (value, ))
            elif channel_name == 'retcode':
                # see if any leftovers after receiving exit code
                stdout, stderr = self.get_output()
                for data_channel_name, values in (('stdout', stdout),
                                                  ('stderr', stderr)):
                    for data_value in values:
                        yield (data_channel_name, data_value)
                value = self.set_retcode(value)
            elif channel_name == 'ack':
                self.on_ack(value)
                continue

            yield (channel_name, value)

            self.raise_if_needed()
Ejemplo n.º 23
0
 def on_sent(self):
     if not self.ack_timer:
         self.ack_timer = Timer(
             expiration=TALKER_CONTEXT.ack_timeout
         )  # this is expiration on the agent ack'ing the command
Ejemplo n.º 24
0
class concurrent(object):

    def __init__(self, func, *args, **kwargs):
        self.func = func
        self.args = args
        self.kwargs = kwargs
        self.throw = kwargs.pop('throw', True)
        self.daemon = kwargs.pop('daemon', True)
        self.threadname = kwargs.pop('threadname', 'anon-%X' % id(self))
        self.stopper = kwargs.pop('stopper', threading.Event())
        self.sleep = kwargs.pop('sleep', 1)
        self.loop = kwargs.pop('loop', False)
        self.timer = None

        real_thread_no_greenlet = kwargs.pop('real_thread_no_greenlet', False)
        if is_module_patched("threading"):
            # in case of using apply_gevent_patch function - use this option in order to defer some jobs to real threads
            self.real_thread_no_greenlet = real_thread_no_greenlet
        else:
            # gevent isn't active, no need to do anything special
            self.real_thread_no_greenlet = False

        rimt = kwargs.pop("raise_in_main_thread", False)
        if rimt:
            exc_type = Exception if rimt is True else rimt
            self.func = raise_in_main_thread(exc_type)(self.func)

    def __repr__(self):
        flags = ""
        if self.daemon:
            flags += 'D'
        if self.loop:
            flags += 'L'
        if self.real_thread_no_greenlet:
            flags += 'T'
        return "<%s[%s] '%s'>" % (self.__class__.__name__, self.threadname, flags)

    def _logged_func(self):
        self.timer = Timer()
        self.exc = None
        try:
            _logger.debug("%s - starting", self)
            while True:
                self.result = self.func(*self.args, **self.kwargs)
                if not self.loop:
                    return
                if self.wait(self.sleep):
                    _logger.debug("%s - stopped", self)
                    return
        except Exception as exc:
            _logger.silent_exception("Exception in thread running %s (traceback can be found in debug-level logs)", self.func)
            self.exc = exc
        finally:
            self.timer.stop()
            self.stop()

    def stop(self):
        _logger.debug("%s - stopping", self)
        self.stopper.set()

    def wait(self, timeout=None):
        if self.real_thread_no_greenlet:
            # we can't '.wait' on this gevent event object, so instead we test it and sleep manually:
            if self.stopper.is_set():
                return True
            non_gevent_sleep(timeout)
            if self.stopper.is_set():
                return True
            return False
        return self.stopper.wait(timeout)

    @contextmanager
    def paused(self):
        self.stop()
        yield
        self.start()

    @contextmanager
    def _running(self):
        if _disabled:
            self._logged_func()
            yield self
            return

        if self.real_thread_no_greenlet:
            _logger.debug('sending job to a real OS thread')
            self._join = defer_to_thread(func=self._logged_func, threadname=self.threadname)
        else:
            # threading.Thread could be a real thread or a gevent-patched thread...
            self.thread = threading.Thread(target=self._logged_func, name=self.threadname, daemon=self.daemon)
            _logger.debug('sending job to %s', self.thread)
            self.stopper.clear()
            self.thread.start()
            self._join = self.thread.join
        try:
            yield self
        finally:
            self.stop()  # if we loop, stop it
        self._join()
        if self.throw and self.exc:
            raise self.exc

    def __enter__(self):
        self._ctx = self._running()
        return self._ctx.__enter__()

    def __exit__(self, *args):
        return self._ctx.__exit__(*args)

    def __iter__(self):
        with self:
            self.iterations = 0
            while not self.wait(self.sleep):
                yield self
                self.iterations += 1

    start = __enter__

    def join(self):
        self.__exit__(None, None, None)

    __del__ = join
Ejemplo n.º 25
0
    def as_completed(self,
                     cmds,
                     sleep=0.5,
                     timeout=DAY,
                     initial_log_interval=12):
        """
        Yields completed commands, until all are completed or the timeout expires (if provided).
        If timeout is 0, this will effectively stop the generator instead of blocking on remaining commands

        :param [List[Cmd]] cmds: The list of command to wait on
        :param [int, float] sleep: How long to sleep between checks
        :param [int, float] timeout: After how many seconds should we throw a timeout error
        :param [int, float] initial_log_interval: Interval between logging

        :returns: Yields commands and their results as a tuple
        :rtype: Tuple[Cmd, result???]
        """
        pending = list(cmds)

        # We prevent exceptions from being raised from this iterator, so that we can keep iterating
        # through all cmds. we'll let the caller deal with raising exceptions mid-iteration if it so desires
        # We also need to deal with the case of re-entering this iterator with incomplete cmds, so we must
        # keep the original 'raise_on_failure' setting across invocations
        for cmd in pending:
            cmd._should_raise_on_failure = getattr(cmd,
                                                   "_should_raise_on_failure",
                                                   cmd.raise_on_failure)
            cmd.raise_on_failure = False

        log_interval = initial_log_interval
        logging_timer = BackoffTimer(expiration=log_interval,
                                     max_interval=HOUR)
        timeout_timer = Timer(expiration=timeout)

        had_results = False

        def _poll():
            nonlocal had_results
            results = self.poll(pending)

            done_idxes = []
            for i, result in enumerate(results):
                cmd = pending[i]
                if result is not None:
                    done_idxes.append(i)
                    cmd.raise_on_failure = cmd._should_raise_on_failure
                    yield cmd, result

            for i in sorted(
                    done_idxes, reverse=True
            ):  # Alternative would be to copy pending list, but this is more efficient
                pending.pop(i)

            if pending:
                if logging_timer.expired:
                    logging_timer.backoff()
                    MultiObject(pending).check_client_timeout()
                    hosts = sorted(set(cmd.hostname for cmd in pending))
                    _logger.info("Waiting on %s command(s) on %s host(s): %s",
                                 len(pending), len(hosts), ", ".join(hosts))
                    for cmd in pending:
                        since_started = (
                            "no-ack" if not cmd.ack_supported else
                            "acked {:ago}".format(cmd.since_started)
                            if cmd.ack else "not-started")
                        _logger.debug("   job-id: %s (%s)", cmd.job_id,
                                      since_started)
            if done_idxes:
                had_results = True

        while True:
            yield from _poll()
            if not pending:
                return
            if timeout == 0:
                if had_results:
                    return
                else:
                    continue
            if timeout_timer.expired:
                raise TimeoutException(
                    "Commands (%s) did not complete within timeout" %
                    len(pending),
                    timeout=timeout,
                    pending=pending)