def __init__(self, afe, pool, labels, start_time, end_time): self.pool = pool self.labels = labellib.LabelsMapping(labels) self.labels['pool'] = pool self._pool_labels = [_POOL_PREFIX + self.pool] self.working_hosts = [] self.broken_hosts = [] self.ineligible_hosts = [] self.total_hosts = self._get_hosts(afe, start_time, end_time)
def infer_balancer_targets(afe, arguments, pools): """Take some arguments and translate them to a list of models to balance Args: @param afe AFE object to be used for taking inventory. @param arguments Parsed command line arguments. @param pools The list of pools to balance. @returns a list of (model, labels) tuples to be balanced """ balancer_targets = [] for pool in pools: if arguments.all_models: inventory = lab_inventory.get_inventory(afe) quarantine = _too_many_broken(inventory, pool, arguments) if quarantine: _log_error('Refusing to balance all models for %s pool, ' 'too many models with at least 1 broken DUT ' 'detected.', pool) else: for model in inventory.get_pool_models(pool): labels = labellib.LabelsMapping() labels['model'] = model if arguments.phase: labels['phase'] = arguments.phase balancer_targets.append((pool, labels.getlabels())) metrics.Boolean( 'chromeos/autotest/balance_pools/unchanged_pools').set( quarantine, fields={'pool': pool}) _log_message('Pool %s quarantine status: %s', pool, quarantine) else: for model in arguments.models: labels = labellib.LabelsMapping() labels['model'] = model if arguments.sku: labels['sku'] = arguments.sku if arguments.phase: labels['phase'] = arguments.phase balancer_targets.append((pool, labels.getlabels())) return balancer_targets
def _actions_and_values_iter(cls, labels): """Return sorted action and value pairs to run for labels. @params: An iterable of label strings. @returns: A generator of Actionable and value pairs. """ actionable = cls._filter_actionable_labels(labels) keyval_mapping = labellib.LabelsMapping(actionable) sorted_names = sorted(keyval_mapping, key=cls._get_action_priority) for name in sorted_names: action_item = cls._actions[name] value = keyval_mapping[name] yield action_item, value
def diagnose_pool(self, board, pool, time_delta_hours, limit=10): """Log diagnostic information about a timeout for a board/pool. @param board: The board for which the current suite was run. @param pool: The pool against which the current suite was run. @param time_delta_hours: The time from which we should log information. This is a datetime.timedelta object, as stored by the JobTimer. @param limit: The maximum number of jobs per host, to log. @raises proxy.JSONRPCException: For exceptions thrown across the wire. """ end_time = datetime.now() start_time = end_time - time_delta_hours labels = labellib.LabelsMapping() labels['board'] = board labels['pool'] = pool host_histories = status_history.HostJobHistory.get_multiple_histories( self.rpc_interface, time_utils.to_epoch_time(start_time), time_utils.to_epoch_time(end_time), labels.getlabels(), ) if not host_histories: logging.error('No hosts found for board:%s in pool:%s', board, pool) return status_map = { status_history.UNUSED: 'Unused', status_history.UNKNOWN: 'No job history', status_history.WORKING: 'Working', status_history.BROKEN: 'Failed repair' } for history in host_histories: count = 0 job_info = '' for job in history: start_time = (time_utils.epoch_time_to_date_string( job.start_time)) job_info += ('%s %s started on: %s status %s\n' % (job.id, job.name, start_time, job.job_status)) count += 1 if count >= limit: break host = history.host logging.error( 'host: %s, status: %s, locked: %s ' 'diagnosis: %s\n' 'labels: %s\nLast %s jobs within %s:\n' '%s', history.hostname, host.status, host.locked, status_map[history.last_diagnosis()[0]], host.labels, limit, time_delta_hours, job_info)
def _validate_host_list(afe, arguments): """Validate the user-specified list of hosts. Hosts may be specified implicitly with --board or --pool, or explictly as command line arguments. This enforces these rules: * If --board or --pool, or both are specified, individual hosts may not be specified. * However specified, there must be at least one host. The return value is a list of HostJobHistory objects for the requested hosts, using the time range supplied on the command line. @param afe Autotest frontend @param arguments Parsed arguments object as returned by ArgumentParser.parse_args(). @return List of HostJobHistory objects for the hosts requested on the command line. """ if arguments.board or arguments.pool or arguments.model: if arguments.hostnames: print >> sys.stderr, ('FATAL: Hostname arguments provided ' 'with --board or --pool') sys.exit(1) labels = labellib.LabelsMapping() labels['board'] = arguments.board labels['pool'] = arguments.pool labels['model'] = arguments.model histories = HostJobHistory.get_multiple_histories( afe, arguments.since, arguments.until, labels.getlabels()) else: histories = _get_host_histories(afe, arguments) if not histories: print >> sys.stderr, 'FATAL: no valid hosts found' sys.exit(1) return histories
def test_setitem(self): labels = ['webcam'] mapping = labellib.LabelsMapping(labels) mapping['pool'] = 'suites' self.assertEqual(mapping['pool'], 'suites')
def test_getitem(self): labels = ['webcam', 'pool:suites'] mapping = labellib.LabelsMapping(labels) self.assertEqual(mapping['pool'], 'suites')
def run_once(self, host, suite_name, moblab_suite_max_retries, target_build='', clear_devserver_cache=True, test_timeout_hint_m=None): """Runs a suite on a Moblab Host against its test DUTS. @param host: Moblab Host that will run the suite. @param suite_name: Name of the suite to run. @param moblab_suite_max_retries: The maximum number of test retries allowed within the suite launched on moblab. @param target_build: Optional build to be use in the run_suite call on moblab. This argument is passed as is to run_suite. It must be a sensible build target for the board of the sub-DUTs attached to the moblab. @param clear_devserver_cache: If True, image cache of the devserver running on moblab is cleared before running the test to validate devserver imaging staging flow. @param test_timeout_hint_m: (int) Optional overall timeout for the test. For this test, it is very important to collect post failure data from the moblab device. If the overall timeout is provided, the test will try to fail early to save some time for log collection from the DUT. @raises AutoservRunError if the suite does not complete successfully. """ self._host = host self._maybe_clear_devserver_cache(clear_devserver_cache) # Fetch the board of the DUT's assigned to this Moblab. There should # only be one type. try: dut = host.afe.get_hosts()[0] except IndexError: raise error.TestFail('All hosts for this MobLab are down. Please ' 'request the lab admins to take a look.') labels = labellib.LabelsMapping(dut.labels) board = labels['board'] if not target_build: stable_version_map = host.afe.get_stable_version_map( host.afe.CROS_IMAGE_TYPE) target_build = stable_version_map.get_image_name(board) logging.info('Running suite: %s.', suite_name) cmd = ("%s/site_utils/run_suite.py --pool='' --board=%s --build=%s " "--suite_name=%s --retry=True " "--max_retries=%d" % (moblab_host.AUTOTEST_INSTALL_DIR, board, target_build, suite_name, moblab_suite_max_retries)) cmd, run_suite_timeout_s = self._append_run_suite_timeout( cmd, test_timeout_hint_m, ) logging.debug('Run suite command: %s', cmd) try: result = host.run_as_moblab(cmd, timeout=run_suite_timeout_s) except error.AutoservRunError as e: if _is_run_suite_error_critical(e.result_obj.exit_status): raise else: logging.debug('Suite Run Output:\n%s', result.stdout) # Cache directory can contain large binaries like CTS/CTS zip files # no need to offload those in the results. # The cache is owned by root user host.run('rm -fR /mnt/moblab/results/shared/cache', timeout=600)
def test_init_mutated_arg_should_not_affect_mapping(self): labels = ['class:protecta', 'exec:chronicle_key', 'method:metafalica'] mapping = labellib.LabelsMapping(labels) original_mapping = copy.deepcopy(mapping) labels.pop() self.assertEqual(mapping, original_mapping)
def test_duplicate_keys_should_take_first(self): labels = ['webcam', 'pool:party', 'pool:suites'] mapping = labellib.LabelsMapping(labels) self.assertEqual(mapping['pool'], 'party')
def test_setitem_to_none_with_missing_key_should_noop(self): labels = ['webcam', 'pool:suites'] mapping = labellib.LabelsMapping(labels) mapping['foo'] = None self.assertNotIn('foo', mapping)
def test_init_should_not_mutate_labels(self): labels = ['class:protecta', 'exec:chronicle_key', 'method:metafalica'] input_labels = copy.deepcopy(labels) mapping = labellib.LabelsMapping(input_labels) mapping['class'] = 'distllista' self.assertEqual(input_labels, labels)
def test_init_and_getlabels_should_move_plain_labels_first(self): labels = ['ohse:tsubame', 'webcam'] mapping = labellib.LabelsMapping(labels) self.assertEqual(mapping.getlabels(), ['webcam', 'ohse:tsubame'])
def test_init_and_getlabels_should_preserve_plain_label_order(self): labels = ['webcam', 'exec', 'method'] mapping = labellib.LabelsMapping(labels) self.assertEqual(mapping.getlabels(), ['webcam', 'exec', 'method'])
def test_getlabels(self): labels = ['webcam', 'pool:suites'] mapping = labellib.LabelsMapping(labels) self.assertEqual(mapping.getlabels(), labels)
def run_once(self, host, moblab_suite_max_retries, target_build='', clear_devserver_cache=True, test_timeout_hint_m=None): """Runs a suite on a Moblab Host against its test DUTS. @param host: Moblab Host that will run the suite. @param moblab_suite_max_retries: The maximum number of test retries allowed within the suite launched on moblab. @param target_build: Optional build to be use in the run_suite call on moblab. This argument is passed as is to run_suite. It must be a sensible build target for the board of the sub-DUTs attached to the moblab. @param clear_devserver_cache: If True, image cache of the devserver running on moblab is cleared before running the test to validate devserver imaging staging flow. @param test_timeout_hint_m: (int) Optional overall timeout for the test. For this test, it is very important to collect post failure data from the moblab device. If the overall timeout is provided, the test will try to fail early to save some time for log collection from the DUT. @raises AutoservRunError if the suite does not complete successfully. """ self._host = host self._maybe_clear_devserver_cache(clear_devserver_cache) duts = host.afe.get_hosts() if len(duts) == 0: raise error.TestFail('All hosts for this MobLab are down. Please ' 'request the lab admins to take a look.') board = None dut_to_label = {} for dut in duts: # Fetch the board of the DUT's assigned to this Moblab. There should # only be one type. board = labellib.LabelsMapping(dut.labels)['board'] for label in dut.labels: if label in self.REQUIRED_LABELS: dut_to_label[dut.hostname] = label if not set(dut_to_label.values()) == self.REQUIRED_LABELS: raise error.TestFail( 'Missing required labels on hosts %s, are some hosts down?' % self.REQUIRED_LABELS - set(dut_to_label.values())) if not board: raise error.TestFail('Could not determine board from hosts.') if not target_build: stable_version_map = host.afe.get_stable_version_map( host.afe.CROS_IMAGE_TYPE) target_build = stable_version_map.get_image_name(board) logging.info('Running suite: hardware_storagequal_cq') cmd = ("%s/site_utils/run_suite.py --pool='' --board=%s --build=%s " "--suite_name=hardware_storagequal_cq --retry=True " "--max_retries=%d" % (moblab_host.AUTOTEST_INSTALL_DIR, board, target_build, moblab_suite_max_retries)) cmd, run_suite_timeout_s = self._append_run_suite_timeout( cmd, test_timeout_hint_m, ) logging.debug('Run suite command: %s', cmd) try: result = host.run_as_moblab(cmd, timeout=run_suite_timeout_s) except error.AutoservRunError as e: if _is_run_suite_error_critical(e.result_obj.exit_status): raise logging.debug('Suite Run Output:\n%s', result.stderr) job_ids = self._get_job_ids_from_suite_output(result.stderr) logging.debug('Suite job ids %s', job_ids) keyvals_per_host = self._get_keyval_files_per_host(host, job_ids) logging.debug('Keyvals grouped by host %s', keyvals_per_host) failed_test = False for hostname in keyvals_per_host: label = dut_to_label[hostname] expected = self.EXPECTED_RESULTS[label] actual = self._get_test_execution_order(host, keyvals_per_host[hostname]) logging.info('Comparing test order for %s from host %s', label, hostname) logging.info('%-37s %s', 'Expected', 'Actual') for i in range(max(len(expected), len(actual))): expected_i = expected[i] if i < len(expected) else None actual_i = actual[i] if i < len(actual) else None check_fail = expected_i != actual_i check_text = 'X' if check_fail else ' ' logging.info('%s %-35s %s', check_text, expected_i, actual_i) failed_test = failed_test or check_fail # Cache directory can contain large binaries like CTS/CTS zip files # no need to offload those in the results. # The cache is owned by root user host.run('rm -fR /mnt/moblab/results/shared/cache', timeout=600) if failed_test: raise error.TestFail( 'Actual test execution order did not match expected')
def test_len(self): labels = ['webcam', 'pool:suites'] mapping = labellib.LabelsMapping(labels) self.assertEqual(len(mapping), 1)
def test_iter(self): labels = ['webcam', 'pool:suites'] mapping = labellib.LabelsMapping(labels) self.assertEqual(list(iter(mapping)), ['pool'])
def test_delitem(self): labels = ['webcam', 'pool:suites'] mapping = labellib.LabelsMapping(labels) del mapping['pool'] self.assertNotIn('pool', mapping)
def test_in(self): labels = ['webcam', 'pool:suites'] mapping = labellib.LabelsMapping(labels) self.assertIn('pool', mapping)
def test_init_and_getlabels_should_preserve_keyval_label_order(self): labels = ['class:protecta', 'method:metafalica', 'exec:chronicle_key'] mapping = labellib.LabelsMapping(labels) self.assertEqual(mapping.getlabels(), labels)
def test_init_and_getlabels_should_remove_duplicates(self): labels = ['webcam', 'pool:suites', 'pool:party'] mapping = labellib.LabelsMapping(labels) self.assertEqual(mapping.getlabels(), ['webcam', 'pool:suites'])
def test_setitem_to_none_should_delete(self): labels = ['webcam', 'pool:suites'] mapping = labellib.LabelsMapping(labels) mapping['pool'] = None self.assertNotIn('pool', mapping)