Example #1
0
 def _statistics(self, rule, start, end):
     try:
         # FIXME(sileht): In case of a heat autoscaling stack decide to
         # delete an instance, the gnocchi metrics associated to this
         # instance will be no more updated and when the alarm will ask
         # for the aggregation, gnocchi will raise a 'No overlap'
         # exception.
         # So temporary set 'needed_overlap' to 0 to disable the
         # gnocchi checks about missing points. For more detail see:
         #   https://bugs.launchpad.net/gnocchi/+bug/1479429
         return self._gnocchi_client.metric.aggregation(
             metrics=rule['metrics'],
             start=start,
             stop=end,
             aggregation=rule['aggregation_method'],
             needed_overlap=0)
     except exceptions.MetricNotFound:
         raise threshold.InsufficientDataError(
             'At least of metrics in %s does not exist' % rule['metrics'],
             [])
     except exceptions.NotFound:
         # TODO(sileht): gnocchiclient should raise a explicit
         # exception for AggregationNotFound, this API endpoint
         # can only raise 3 different 404, so we are safe to
         # assume this is an AggregationNotFound for now.
         raise threshold.InsufficientDataError(
             'aggregation %s does not exist for at least one '
             'metrics in %s' %
             (rule['aggregation_method'], rule['metrics']), [])
     except Exception as e:
         msg = 'alarm statistics retrieval failed: %s' % e
         LOG.warning(msg)
         raise threshold.InsufficientDataError(msg, [])
Example #2
0
 def _statistics(self, rule, start, end):
     try:
         return self._gnocchi_client.metric.get_measures(
             metric=rule['metric'],
             start=start,
             stop=end,
             resource_id=rule['resource_id'],
             aggregation=rule['aggregation_method'])
     except exceptions.MetricNotFound:
         raise threshold.InsufficientDataError(
             'metric %s for resource %s does not exists' %
             (rule['metric'], rule['resource_id']), [])
     except exceptions.ResourceNotFound:
         raise threshold.InsufficientDataError(
             'resource %s does not exists' % rule['resource_id'], [])
     except exceptions.NotFound:
         # TODO(sileht): gnocchiclient should raise a explicit
         # exception for AggregationNotFound, this API endpoint
         # can only raise 3 different 404, so we are safe to
         # assume this is an AggregationNotFound for now.
         raise threshold.InsufficientDataError(
             'aggregation %s does not exist for '
             'metric %s of resource %s' %
             (rule['aggregation_method'], rule['metric'],
              rule['resource_id']), [])
     except Exception as e:
         msg = 'alarm statistics retrieval failed: %s' % e
         LOG.warning(msg)
         raise threshold.InsufficientDataError(msg, [])
Example #3
0
    def _get_unhealthy_members(self, pool_id):
        """Get number of unhealthy members in a pool.

        The member(virutual machine) operating_status keeps ERROR after
        creation before the application is up and running inside, it should be
        ignored during the check.
        """
        unhealthy_members = []

        try:
            ret = self.lb_client.member_list(pool_id)
        except Exception as e:
            LOG.warning("Failed to communicate with load balancing service, "
                        "error: %s", six.text_type(e))
            raise threshold.InsufficientDataError(
                'failed to communicate with load balancing service',
                []
            )

        if getattr(ret, 'status_code', None):
            # Some error happened
            raise threshold.InsufficientDataError(ret.content, [])

        for m in ret.get("members", []):
            try:
                created_time = parser.parse(m['created_at'], ignoretz=True)
            except ValueError:
                LOG.warning('Failed to parse the member created time.')
                continue

            now = timeutils.utcnow()
            t = self.conf.member_creation_time
            if now - created_time < datetime.timedelta(seconds=t):
                LOG.debug("Ignore member which was created within %ss", t)
                continue

            if m["admin_state_up"] and m["operating_status"] == "ERROR":
                unhealthy_members.append(m)

        return unhealthy_members
Example #4
0
 def _sanitize(rule, statistics):
     """Return the datapoints that correspond to the alarm granularity"""
     # TODO(sileht): if there's no direct match, but there is an archive
     # policy with granularity that's an even divisor or the period,
     # we could potentially do a mean-of-means (or max-of-maxes or whatever,
     # but not a stddev-of-stddevs).
     # TODO(sileht): support alarm['exclude_outliers']
     LOG.debug('sanitize stats %s', statistics)
     statistics = [
         stats[VALUE] for stats in statistics
         if stats[GRANULARITY] == rule['granularity']
     ]
     if not statistics:
         raise threshold.InsufficientDataError(
             "No datapoint for granularity %s" % rule['granularity'], [])
     statistics = statistics[-rule['evaluation_periods']:]
     LOG.debug('pruned statistics to %d', len(statistics))
     return statistics