def testResetAfter(self): """Tests that the reset_after flag works to send metrics only once.""" # By mocking out its "time" module, the forked flushing process will think # it should call Flush() whenever we send a metric. self.patchTime() with tempfile.NamedTemporaryFile(dir='/var/tmp') as out: # * The indirect=True flag is required for reset_after to work. # * Using debug_file, we send metrics to the temporary file instead of # sending metrics to production via PubSub. with ts_mon_config.SetupTsMonGlobalState('metrics_unittest', indirect=True, debug_file=out.name): def MetricName(i, flushed): return 'test/metric/name/%d/%s' % (i, flushed) # Each of these .set() calls will result in a Flush() call. for i in range(7): # any extra streams with different fields and reset_after=False # will be cleared only if the below metric is cleared. metrics.Boolean(MetricName(i, True), reset_after=False).set( True, fields={'original': False}) metrics.Boolean(MetricName(i, True), reset_after=True).set( True, fields={'original': True}) for i in range(7): metrics.Boolean(MetricName(i, False), reset_after=False).set(True) # By leaving the context, we .join() the flushing process. with open(out.name, 'r') as fh: content = fh.read() # The flushed metrics should be sent only three times, because: # * original=False is sent twice # * original=True is sent once. for i in range(7): self.assertEqual(content.count(MetricName(i, True)), 3) # The nonflushed metrics are sent once-per-flush. # There are 7 of these metrics, # * The 0th is sent 7 times, # * The 1st is sent 6 times, # ... # * The 6th is sent 1 time. # So the "i"th metric is sent (7-i) times. for i in range(7): self.assertEqual(content.count(MetricName(i, False)), 7 - i)
def testEnqueue(self): """Test that _Indirect enqueues messages correctly.""" metric = metrics.Boolean('foo') # The metric should be pickleable pickle.dumps(metric) q = Queue.Queue() self.PatchObject(metrics, 'MESSAGE_QUEUE', q) proxy_metric = metrics.Boolean('foo') proxy_metric.example_method('arg1', 'arg2') entry = q.get_nowait() self.assertEqual(entry, (metric, 'example_method', ('arg1', 'arg2'), {}))
def record_working_state(self, working, timestamp): """Report to Monarch whether we are working or broken. @param working Host repair status. `True` means that the DUT is up and expected to pass tests. `False` means the DUT has failed repair and requires manual intervention. @param timestamp Time that the status was recorded. """ fields = self.get_metric_fields() metrics.Boolean(self._HOST_WORKING_METRIC, reset_after=True).set(working, fields=fields) metrics.Boolean(self._BOARD_SHARD_METRIC, reset_after=True).set( True, fields={'board': self.board or ''}) self.record_pool(fields)
def _ignored_metric(self): return metrics.Boolean( self._metric_root_path + 'ignored', description=( 'A boolean, for servers ignored for test infra prod alerts.'), field_spec=[ts_mon.StringField('target_data_center'), ts_mon.StringField('target_hostname'),])
def _presence_metric(self): return metrics.Boolean( self._metric_root_path + 'presence', description=( 'A boolean indicating whether a server is in the machines db.'), field_spec=[ts_mon.StringField('target_data_center'), ts_mon.StringField('target_hostname'),])
def calculate_spares_needed(self, target_total): """Calculate and log the spares needed to achieve a target. Return how many working spares are needed to achieve the given `target_total` with all DUTs working. The spares count may be positive or negative. Positive values indicate spares are needed to replace broken DUTs in order to reach the target; negative numbers indicate that no spares are needed, and that a corresponding number of working devices can be returned. If the new target total would require returning ineligible DUTs, an error is logged, and the target total is adjusted so that those DUTs are not exchanged. @param target_total The new target pool size. @return The number of spares needed. """ num_ineligible = len(self.ineligible_hosts) spares_needed = target_total >= num_ineligible metrics.Boolean( 'chromeos/autotest/balance_pools/exhausted_pools', 'True for each pool/model which requests more DUTs than supplied', # TODO(jrbarnette) The 'board' field is a legacy. We need # to leave it here until we do the extra work Monarch # requires to delete a field. field_spec=[ ts_mon.StringField('pool'), ts_mon.StringField('board'), ts_mon.StringField('model'), ]).set( not spares_needed, fields={ 'pool': self.pool, 'board': self.labels.get('model', ''), 'model': self.labels.get('model', ''), }, ) if not spares_needed: _log_error( '%s pool (%s): Target of %d is below minimum of %d DUTs.', self.pool, self.labels, target_total, num_ineligible, ) _log_error('Adjusting target to %d DUTs.', num_ineligible) target_total = num_ineligible else: _log_message('%s %s pool: Target of %d is above minimum.', self.labels.get('model', ''), self.pool, target_total) adjustment = target_total - self.total_hosts return len(self.broken_hosts) + adjustment
def infer_balancer_targets(afe, arguments, pools): """Take some arguments and translate them to a list of models to balance Args: @param afe AFE object to be used for taking inventory. @param arguments Parsed command line arguments. @param pools The list of pools to balance. @returns a list of (model, labels) tuples to be balanced """ balancer_targets = [] for pool in pools: if arguments.all_models: inventory = lab_inventory.get_inventory(afe) quarantine = _too_many_broken(inventory, pool, arguments) if quarantine: _log_error('Refusing to balance all models for %s pool, ' 'too many models with at least 1 broken DUT ' 'detected.', pool) else: for model in inventory.get_pool_models(pool): labels = labellib.LabelsMapping() labels['model'] = model if arguments.phase: labels['phase'] = arguments.phase balancer_targets.append((pool, labels.getlabels())) metrics.Boolean( 'chromeos/autotest/balance_pools/unchanged_pools').set( quarantine, fields={'pool': pool}) _log_message('Pool %s quarantine status: %s', pool, quarantine) else: for model in arguments.models: labels = labellib.LabelsMapping() labels['model'] = model if arguments.sku: labels['sku'] = arguments.sku if arguments.phase: labels['phase'] = arguments.phase balancer_targets.append((pool, labels.getlabels())) return balancer_targets
def main(args): """Entry point. Args: args: Sequence of command arguments. """ opts = ParseArguments(args) ts_mon_config.SetupTsMonGlobalState('prodmon') presence_metric = metrics.Boolean(METRIC_PREFIX + 'presence') roles_metric = metrics.String(METRIC_PREFIX + 'roles') reporter = ProdHostReporter( source=AtestSource(atest_program=ATEST_PROGRAM), sinks=[ TsMonSink(presence_metric=presence_metric, roles_metric=roles_metric), LoggingSink() ]) mainloop = MainLoop(interval=opts.interval, func=reporter) mainloop.LoopForever()
def set_status(self, status): """Proxy for setting the status of a host via the rdb. @param status: The new status. """ # Update elasticsearch db. self._update({'status': status}) self.record_state('host_history', 'status', status) # Update Monarch. fields = self.get_metric_fields() self.record_pool(fields) # As each device switches state, indicate that it is not in any # other state. This allows Monarch queries to avoid double counting # when additional points are added by the Window Align operation. host_status_metric = metrics.Boolean(self._HOST_STATUS_METRIC, reset_after=True) for s in rdb_models.AbstractHostModel.Status.names: fields['status'] = s host_status_metric.set(s == status, fields=fields)
# Copyright 2015 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import logging from chromite.lib import metrics DRONE_ACCESSIBILITY_METRIC = metrics.Boolean( 'chromeos/autotest/scheduler/drone_accessibility') class DroneTaskQueueException(Exception): """Generic task queue exception.""" pass class DroneTaskQueue(object): """A manager to run queued tasks in drones and gather results from them.""" def __init__(self): self.results = dict() def get_results(self): """Get a results dictionary keyed on drones. @return: A dictionary of return values from drones. """ results_copy = self.results.copy() self.results.clear() return results_copy