def testResetAfter(self):
        """Tests that the reset_after flag works to send metrics only once."""
        # By mocking out its "time" module, the forked flushing process will think
        # it should call Flush() whenever we send a metric.
        self.patchTime()

        with tempfile.NamedTemporaryFile(dir='/var/tmp') as out:
            # * The indirect=True flag is required for reset_after to work.
            # * Using debug_file, we send metrics to the temporary file instead of
            # sending metrics to production via PubSub.
            with ts_mon_config.SetupTsMonGlobalState('metrics_unittest',
                                                     indirect=True,
                                                     debug_file=out.name):

                def MetricName(i, flushed):
                    return 'test/metric/name/%d/%s' % (i, flushed)

                # Each of these .set() calls will result in a Flush() call.
                for i in range(7):
                    # any extra streams with different fields and reset_after=False
                    # will be cleared only if the below metric is cleared.
                    metrics.Boolean(MetricName(i, True),
                                    reset_after=False).set(
                                        True, fields={'original': False})

                    metrics.Boolean(MetricName(i, True), reset_after=True).set(
                        True, fields={'original': True})

                for i in range(7):
                    metrics.Boolean(MetricName(i, False),
                                    reset_after=False).set(True)

            # By leaving the context, we .join() the flushing process.
            with open(out.name, 'r') as fh:
                content = fh.read()

            # The flushed metrics should be sent only three times, because:
            # * original=False is sent twice
            # * original=True is sent once.
            for i in range(7):
                self.assertEqual(content.count(MetricName(i, True)), 3)

            # The nonflushed metrics are sent once-per-flush.
            # There are 7 of these metrics,
            # * The 0th is sent 7 times,
            # * The 1st is sent 6 times,
            # ...
            # * The 6th is sent 1 time.
            # So the "i"th metric is sent (7-i) times.
            for i in range(7):
                self.assertEqual(content.count(MetricName(i, False)), 7 - i)
Esempio n. 2
0
    def testEnqueue(self):
        """Test that _Indirect enqueues messages correctly."""
        metric = metrics.Boolean('foo')
        # The metric should be pickleable
        pickle.dumps(metric)

        q = Queue.Queue()
        self.PatchObject(metrics, 'MESSAGE_QUEUE', q)

        proxy_metric = metrics.Boolean('foo')
        proxy_metric.example_method('arg1', 'arg2')

        entry = q.get_nowait()
        self.assertEqual(entry,
                         (metric, 'example_method', ('arg1', 'arg2'), {}))
Esempio n. 3
0
    def record_working_state(self, working, timestamp):
        """Report to Monarch whether we are working or broken.

        @param working    Host repair status. `True` means that the DUT
                          is up and expected to pass tests.  `False`
                          means the DUT has failed repair and requires
                          manual intervention.
        @param timestamp  Time that the status was recorded.
        """
        fields = self.get_metric_fields()
        metrics.Boolean(self._HOST_WORKING_METRIC,
                        reset_after=True).set(working, fields=fields)
        metrics.Boolean(self._BOARD_SHARD_METRIC, reset_after=True).set(
            True, fields={'board': self.board or ''})
        self.record_pool(fields)
Esempio n. 4
0
 def _ignored_metric(self):
   return metrics.Boolean(
       self._metric_root_path + 'ignored',
       description=(
           'A boolean, for servers ignored for test infra prod alerts.'),
       field_spec=[ts_mon.StringField('target_data_center'),
                   ts_mon.StringField('target_hostname'),])
Esempio n. 5
0
 def _presence_metric(self):
   return metrics.Boolean(
       self._metric_root_path + 'presence',
       description=(
           'A boolean indicating whether a server is in the machines db.'),
       field_spec=[ts_mon.StringField('target_data_center'),
                   ts_mon.StringField('target_hostname'),])
    def calculate_spares_needed(self, target_total):
        """Calculate and log the spares needed to achieve a target.

        Return how many working spares are needed to achieve the
        given `target_total` with all DUTs working.

        The spares count may be positive or negative.  Positive
        values indicate spares are needed to replace broken DUTs in
        order to reach the target; negative numbers indicate that
        no spares are needed, and that a corresponding number of
        working devices can be returned.

        If the new target total would require returning ineligible
        DUTs, an error is logged, and the target total is adjusted
        so that those DUTs are not exchanged.

        @param target_total  The new target pool size.

        @return The number of spares needed.

        """
        num_ineligible = len(self.ineligible_hosts)
        spares_needed = target_total >= num_ineligible
        metrics.Boolean(
            'chromeos/autotest/balance_pools/exhausted_pools',
            'True for each pool/model which requests more DUTs than supplied',
            # TODO(jrbarnette) The 'board' field is a legacy.  We need
            # to leave it here until we do the extra work Monarch
            # requires to delete a field.
            field_spec=[
                ts_mon.StringField('pool'),
                ts_mon.StringField('board'),
                ts_mon.StringField('model'),
            ]).set(
                not spares_needed,
                fields={
                    'pool': self.pool,
                    'board': self.labels.get('model', ''),
                    'model': self.labels.get('model', ''),
                },
            )
        if not spares_needed:
            _log_error(
                '%s pool (%s): Target of %d is below minimum of %d DUTs.',
                self.pool,
                self.labels,
                target_total,
                num_ineligible,
            )
            _log_error('Adjusting target to %d DUTs.', num_ineligible)
            target_total = num_ineligible
        else:
            _log_message('%s %s pool: Target of %d is above minimum.',
                         self.labels.get('model', ''), self.pool, target_total)
        adjustment = target_total - self.total_hosts
        return len(self.broken_hosts) + adjustment
Esempio n. 7
0
def infer_balancer_targets(afe, arguments, pools):
    """Take some arguments and translate them to a list of models to balance

    Args:
    @param afe           AFE object to be used for taking inventory.
    @param arguments     Parsed command line arguments.
    @param pools         The list of pools to balance.

    @returns    a list of (model, labels) tuples to be balanced

    """
    balancer_targets = []

    for pool in pools:
        if arguments.all_models:
            inventory = lab_inventory.get_inventory(afe)
            quarantine = _too_many_broken(inventory, pool, arguments)
            if quarantine:
                _log_error('Refusing to balance all models for %s pool, '
                           'too many models with at least 1 broken DUT '
                           'detected.', pool)
            else:
                for model in inventory.get_pool_models(pool):
                    labels = labellib.LabelsMapping()
                    labels['model'] = model
                    if arguments.phase:
                        labels['phase'] = arguments.phase
                    balancer_targets.append((pool, labels.getlabels()))
            metrics.Boolean(
                'chromeos/autotest/balance_pools/unchanged_pools').set(
                    quarantine, fields={'pool': pool})
            _log_message('Pool %s quarantine status: %s', pool, quarantine)
        else:
            for model in arguments.models:
                labels = labellib.LabelsMapping()
                labels['model'] = model
                if arguments.sku:
                    labels['sku'] = arguments.sku
                if arguments.phase:
                    labels['phase'] = arguments.phase
                balancer_targets.append((pool, labels.getlabels()))
    return balancer_targets
Esempio n. 8
0
def main(args):
    """Entry point.

  Args:
    args: Sequence of command arguments.
  """
    opts = ParseArguments(args)
    ts_mon_config.SetupTsMonGlobalState('prodmon')

    presence_metric = metrics.Boolean(METRIC_PREFIX + 'presence')
    roles_metric = metrics.String(METRIC_PREFIX + 'roles')
    reporter = ProdHostReporter(
        source=AtestSource(atest_program=ATEST_PROGRAM),
        sinks=[
            TsMonSink(presence_metric=presence_metric,
                      roles_metric=roles_metric),
            LoggingSink()
        ])
    mainloop = MainLoop(interval=opts.interval, func=reporter)
    mainloop.LoopForever()
Esempio n. 9
0
    def set_status(self, status):
        """Proxy for setting the status of a host via the rdb.

        @param status: The new status.
        """
        # Update elasticsearch db.
        self._update({'status': status})
        self.record_state('host_history', 'status', status)

        # Update Monarch.
        fields = self.get_metric_fields()
        self.record_pool(fields)
        # As each device switches state, indicate that it is not in any
        # other state.  This allows Monarch queries to avoid double counting
        # when additional points are added by the Window Align operation.
        host_status_metric = metrics.Boolean(self._HOST_STATUS_METRIC,
                                             reset_after=True)
        for s in rdb_models.AbstractHostModel.Status.names:
            fields['status'] = s
            host_status_metric.set(s == status, fields=fields)
Esempio n. 10
0
# Copyright 2015 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import logging

from chromite.lib import metrics

DRONE_ACCESSIBILITY_METRIC = metrics.Boolean(
    'chromeos/autotest/scheduler/drone_accessibility')


class DroneTaskQueueException(Exception):
    """Generic task queue exception."""
    pass


class DroneTaskQueue(object):
    """A manager to run queued tasks in drones and gather results from them."""
    def __init__(self):
        self.results = dict()

    def get_results(self):
        """Get a results dictionary keyed on drones.

        @return: A dictionary of return values from drones.
        """
        results_copy = self.results.copy()
        self.results.clear()
        return results_copy