Example #1
0
    def _instance_wait_safe(self, instance_method, *args, **kwargs):
        """
        Wrapper around GCE instance methods that is safer to use.

        Let's try a method, and if it fails, let's retry using an exponential
        backoff algorithm, similar to what Amazon recommends for it's own
        service [1].

        :see: [1] http://docs.aws.amazon.com/general/latest/gr/api-retries.html
        """
        threshold = 300
        ok = False
        retries = 0
        max_retries = 9
        while not ok and retries <= max_retries:
            try:
                return instance_method(*args, **kwargs)
            except Exception as details:  # pylint: disable=broad-except
                self.log.error('Call to method %s (retries: %s) failed: %s',
                               instance_method, retries, details)
                time.sleep(min((2**retries) * 2, threshold))
                retries += 1

        if not ok:
            raise cluster.NodeError('GCE instance %s method call error after '
                                    'exponential backoff wait' %
                                    self.ec2_host.id)
Example #2
0
 def _instance_wait_safe(self, instance_method: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
     try:
         return exponential_retry(func=lambda: instance_method(*args, **kwargs), logger=self.log)
     except tenacity.RetryError:
         raise cluster.NodeError(
             f"Timeout while running '{instance_method.__name__}' method on GCE instance '{self._instance.id}'"
         ) from None
 def _instance_wait_safe(self, instance_method: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
     try:
         return exponential_retry(func=lambda: instance_method(*args, **kwargs), logger=None)
     except tenacity.RetryError:
         try:
             self._instance.reload()
         except Exception as ex:  # pylint: disable=broad-except
             LOGGER.exception("Error while reloading instance metadata: %s", ex)
         finally:
             LOGGER.debug(self._instance.meta.data)
             raise cluster.NodeError(
                 f"Timeout while running '{instance_method.__name__}' method on AWS instance '{self._instance.id}'"
             ) from None
Example #4
0
    def _instance_wait_safe(self, instance_method, *args, **kwargs):
        """
        Wrapper around AWS instance waiters that is safer to use.

        Since AWS adopts an eventual consistency model, sometimes the method
        wait_until_running will raise a botocore.exceptions.WaiterError saying
        the instance does not exist. AWS API guide [1] recommends that the
        procedure is retried using an exponencial backoff algorithm [2].

        :see: [1] http://docs.aws.amazon.com/AWSEC2/latest/APIReference/query-api-troubleshooting.html#eventual-consistency
        :see: [2] http://docs.aws.amazon.com/general/latest/gr/api-retries.html
        """
        threshold = 300
        ok = False
        retries = 0
        max_retries = 9
        while not ok and retries <= max_retries:
            try:
                instance_method(*args, **kwargs)
                ok = True
            except WaiterError:
                time.sleep(min((2**retries) * 2, threshold))
                retries += 1

        if not ok:
            try:
                self._instance.reload()
            except Exception as ex:  # pylint: disable=broad-except
                LOGGER.exception("Error while reloading instance metadata: %s",
                                 ex)
            finally:
                method_name = instance_method.__name__
                instance_id = self._instance.id
                LOGGER.debug(self._instance.meta.data)
                msg = "Timeout while running '{method_name}' method on AWS instance '{instance_id}'".format(
                    method_name=method_name, instance_id=instance_id)
                raise cluster.NodeError(msg)