Esempio n. 1
0
    def __init__(self, afe, pool, labels, start_time, end_time):
        self.pool = pool
        self.labels = labellib.LabelsMapping(labels)
        self.labels['pool'] = pool
        self._pool_labels = [_POOL_PREFIX + self.pool]

        self.working_hosts = []
        self.broken_hosts = []
        self.ineligible_hosts = []
        self.total_hosts = self._get_hosts(afe, start_time, end_time)
Esempio n. 2
0
def infer_balancer_targets(afe, arguments, pools):
    """Take some arguments and translate them to a list of models to balance

    Args:
    @param afe           AFE object to be used for taking inventory.
    @param arguments     Parsed command line arguments.
    @param pools         The list of pools to balance.

    @returns    a list of (model, labels) tuples to be balanced

    """
    balancer_targets = []

    for pool in pools:
        if arguments.all_models:
            inventory = lab_inventory.get_inventory(afe)
            quarantine = _too_many_broken(inventory, pool, arguments)
            if quarantine:
                _log_error('Refusing to balance all models for %s pool, '
                           'too many models with at least 1 broken DUT '
                           'detected.', pool)
            else:
                for model in inventory.get_pool_models(pool):
                    labels = labellib.LabelsMapping()
                    labels['model'] = model
                    if arguments.phase:
                        labels['phase'] = arguments.phase
                    balancer_targets.append((pool, labels.getlabels()))
            metrics.Boolean(
                'chromeos/autotest/balance_pools/unchanged_pools').set(
                    quarantine, fields={'pool': pool})
            _log_message('Pool %s quarantine status: %s', pool, quarantine)
        else:
            for model in arguments.models:
                labels = labellib.LabelsMapping()
                labels['model'] = model
                if arguments.sku:
                    labels['sku'] = arguments.sku
                if arguments.phase:
                    labels['phase'] = arguments.phase
                balancer_targets.append((pool, labels.getlabels()))
    return balancer_targets
Esempio n. 3
0
    def _actions_and_values_iter(cls, labels):
        """Return sorted action and value pairs to run for labels.

        @params: An iterable of label strings.
        @returns: A generator of Actionable and value pairs.
        """
        actionable = cls._filter_actionable_labels(labels)
        keyval_mapping = labellib.LabelsMapping(actionable)
        sorted_names = sorted(keyval_mapping, key=cls._get_action_priority)
        for name in sorted_names:
            action_item = cls._actions[name]
            value = keyval_mapping[name]
            yield action_item, value
Esempio n. 4
0
    def diagnose_pool(self, board, pool, time_delta_hours, limit=10):
        """Log diagnostic information about a timeout for a board/pool.

        @param board: The board for which the current suite was run.
        @param pool: The pool against which the current suite was run.
        @param time_delta_hours: The time from which we should log information.
            This is a datetime.timedelta object, as stored by the JobTimer.
        @param limit: The maximum number of jobs per host, to log.

        @raises proxy.JSONRPCException: For exceptions thrown across the wire.
        """
        end_time = datetime.now()
        start_time = end_time - time_delta_hours
        labels = labellib.LabelsMapping()
        labels['board'] = board
        labels['pool'] = pool
        host_histories = status_history.HostJobHistory.get_multiple_histories(
            self.rpc_interface,
            time_utils.to_epoch_time(start_time),
            time_utils.to_epoch_time(end_time),
            labels.getlabels(),
        )
        if not host_histories:
            logging.error('No hosts found for board:%s in pool:%s', board,
                          pool)
            return
        status_map = {
            status_history.UNUSED: 'Unused',
            status_history.UNKNOWN: 'No job history',
            status_history.WORKING: 'Working',
            status_history.BROKEN: 'Failed repair'
        }
        for history in host_histories:
            count = 0
            job_info = ''
            for job in history:
                start_time = (time_utils.epoch_time_to_date_string(
                    job.start_time))
                job_info += ('%s %s started on: %s status %s\n' %
                             (job.id, job.name, start_time, job.job_status))
                count += 1
                if count >= limit:
                    break
            host = history.host
            logging.error(
                'host: %s, status: %s, locked: %s '
                'diagnosis: %s\n'
                'labels: %s\nLast %s jobs within %s:\n'
                '%s', history.hostname, host.status, host.locked,
                status_map[history.last_diagnosis()[0]], host.labels, limit,
                time_delta_hours, job_info)
def _validate_host_list(afe, arguments):
    """Validate the user-specified list of hosts.

    Hosts may be specified implicitly with --board or --pool, or
    explictly as command line arguments.  This enforces these
    rules:
      * If --board or --pool, or both are specified, individual
        hosts may not be specified.
      * However specified, there must be at least one host.

    The return value is a list of HostJobHistory objects for the
    requested hosts, using the time range supplied on the command
    line.

    @param afe       Autotest frontend
    @param arguments Parsed arguments object as returned by
                     ArgumentParser.parse_args().
    @return List of HostJobHistory objects for the hosts requested
            on the command line.

    """
    if arguments.board or arguments.pool or arguments.model:
        if arguments.hostnames:
            print >> sys.stderr, ('FATAL: Hostname arguments provided '
                                  'with --board or --pool')
            sys.exit(1)

        labels = labellib.LabelsMapping()
        labels['board'] = arguments.board
        labels['pool'] = arguments.pool
        labels['model'] = arguments.model
        histories = HostJobHistory.get_multiple_histories(
            afe, arguments.since, arguments.until, labels.getlabels())
    else:
        histories = _get_host_histories(afe, arguments)
    if not histories:
        print >> sys.stderr, 'FATAL: no valid hosts found'
        sys.exit(1)
    return histories
Esempio n. 6
0
 def test_setitem(self):
     labels = ['webcam']
     mapping = labellib.LabelsMapping(labels)
     mapping['pool'] = 'suites'
     self.assertEqual(mapping['pool'], 'suites')
Esempio n. 7
0
 def test_getitem(self):
     labels = ['webcam', 'pool:suites']
     mapping = labellib.LabelsMapping(labels)
     self.assertEqual(mapping['pool'], 'suites')
    def run_once(self, host, suite_name, moblab_suite_max_retries,
                 target_build='', clear_devserver_cache=True,
                 test_timeout_hint_m=None):
        """Runs a suite on a Moblab Host against its test DUTS.

        @param host: Moblab Host that will run the suite.
        @param suite_name: Name of the suite to run.
        @param moblab_suite_max_retries: The maximum number of test retries
                allowed within the suite launched on moblab.
        @param target_build: Optional build to be use in the run_suite
                call on moblab. This argument is passed as is to run_suite. It
                must be a sensible build target for the board of the sub-DUTs
                attached to the moblab.
        @param clear_devserver_cache: If True, image cache of the devserver
                running on moblab is cleared before running the test to validate
                devserver imaging staging flow.
        @param test_timeout_hint_m: (int) Optional overall timeout for the test.
                For this test, it is very important to collect post failure data
                from the moblab device. If the overall timeout is provided, the
                test will try to fail early to save some time for log collection
                from the DUT.

        @raises AutoservRunError if the suite does not complete successfully.
        """
        self._host = host

        self._maybe_clear_devserver_cache(clear_devserver_cache)
        # Fetch the board of the DUT's assigned to this Moblab. There should
        # only be one type.
        try:
            dut = host.afe.get_hosts()[0]
        except IndexError:
            raise error.TestFail('All hosts for this MobLab are down. Please '
                                 'request the lab admins to take a look.')

        labels = labellib.LabelsMapping(dut.labels)
        board = labels['board']

        if not target_build:
            stable_version_map = host.afe.get_stable_version_map(
                    host.afe.CROS_IMAGE_TYPE)
            target_build = stable_version_map.get_image_name(board)

        logging.info('Running suite: %s.', suite_name)
        cmd = ("%s/site_utils/run_suite.py --pool='' --board=%s --build=%s "
               "--suite_name=%s --retry=True " "--max_retries=%d" %
               (moblab_host.AUTOTEST_INSTALL_DIR, board, target_build,
                suite_name, moblab_suite_max_retries))
        cmd, run_suite_timeout_s = self._append_run_suite_timeout(
                cmd,
                test_timeout_hint_m,
        )

        logging.debug('Run suite command: %s', cmd)
        try:
            result = host.run_as_moblab(cmd, timeout=run_suite_timeout_s)
        except error.AutoservRunError as e:
            if _is_run_suite_error_critical(e.result_obj.exit_status):
                raise
        else:
            logging.debug('Suite Run Output:\n%s', result.stdout)
            # Cache directory can contain large binaries like CTS/CTS zip files
            # no need to offload those in the results.
            # The cache is owned by root user
            host.run('rm -fR /mnt/moblab/results/shared/cache',
                      timeout=600)
Esempio n. 9
0
 def test_init_mutated_arg_should_not_affect_mapping(self):
     labels = ['class:protecta', 'exec:chronicle_key', 'method:metafalica']
     mapping = labellib.LabelsMapping(labels)
     original_mapping = copy.deepcopy(mapping)
     labels.pop()
     self.assertEqual(mapping, original_mapping)
Esempio n. 10
0
 def test_duplicate_keys_should_take_first(self):
     labels = ['webcam', 'pool:party', 'pool:suites']
     mapping = labellib.LabelsMapping(labels)
     self.assertEqual(mapping['pool'], 'party')
Esempio n. 11
0
 def test_setitem_to_none_with_missing_key_should_noop(self):
     labels = ['webcam', 'pool:suites']
     mapping = labellib.LabelsMapping(labels)
     mapping['foo'] = None
     self.assertNotIn('foo', mapping)
Esempio n. 12
0
 def test_init_should_not_mutate_labels(self):
     labels = ['class:protecta', 'exec:chronicle_key', 'method:metafalica']
     input_labels = copy.deepcopy(labels)
     mapping = labellib.LabelsMapping(input_labels)
     mapping['class'] = 'distllista'
     self.assertEqual(input_labels, labels)
Esempio n. 13
0
 def test_init_and_getlabels_should_move_plain_labels_first(self):
     labels = ['ohse:tsubame', 'webcam']
     mapping = labellib.LabelsMapping(labels)
     self.assertEqual(mapping.getlabels(), ['webcam', 'ohse:tsubame'])
Esempio n. 14
0
 def test_init_and_getlabels_should_preserve_plain_label_order(self):
     labels = ['webcam', 'exec', 'method']
     mapping = labellib.LabelsMapping(labels)
     self.assertEqual(mapping.getlabels(), ['webcam', 'exec', 'method'])
Esempio n. 15
0
 def test_getlabels(self):
     labels = ['webcam', 'pool:suites']
     mapping = labellib.LabelsMapping(labels)
     self.assertEqual(mapping.getlabels(), labels)
    def run_once(self,
                 host,
                 moblab_suite_max_retries,
                 target_build='',
                 clear_devserver_cache=True,
                 test_timeout_hint_m=None):
        """Runs a suite on a Moblab Host against its test DUTS.

        @param host: Moblab Host that will run the suite.
        @param moblab_suite_max_retries: The maximum number of test retries
                allowed within the suite launched on moblab.
        @param target_build: Optional build to be use in the run_suite
                call on moblab. This argument is passed as is to run_suite. It
                must be a sensible build target for the board of the sub-DUTs
                attached to the moblab.
        @param clear_devserver_cache: If True, image cache of the devserver
                running on moblab is cleared before running the test to validate
                devserver imaging staging flow.
        @param test_timeout_hint_m: (int) Optional overall timeout for the test.
                For this test, it is very important to collect post failure data
                from the moblab device. If the overall timeout is provided, the
                test will try to fail early to save some time for log collection
                from the DUT.

        @raises AutoservRunError if the suite does not complete successfully.
        """
        self._host = host
        self._maybe_clear_devserver_cache(clear_devserver_cache)

        duts = host.afe.get_hosts()
        if len(duts) == 0:
            raise error.TestFail('All hosts for this MobLab are down. Please '
                                 'request the lab admins to take a look.')

        board = None
        dut_to_label = {}
        for dut in duts:
            # Fetch the board of the DUT's assigned to this Moblab. There should
            # only be one type.
            board = labellib.LabelsMapping(dut.labels)['board']
            for label in dut.labels:
                if label in self.REQUIRED_LABELS:
                    dut_to_label[dut.hostname] = label

        if not set(dut_to_label.values()) == self.REQUIRED_LABELS:
            raise error.TestFail(
                'Missing required labels on hosts %s, are some hosts down?' %
                self.REQUIRED_LABELS - set(dut_to_label.values()))

        if not board:
            raise error.TestFail('Could not determine board from hosts.')

        if not target_build:
            stable_version_map = host.afe.get_stable_version_map(
                host.afe.CROS_IMAGE_TYPE)
            target_build = stable_version_map.get_image_name(board)

        logging.info('Running suite: hardware_storagequal_cq')
        cmd = ("%s/site_utils/run_suite.py --pool='' --board=%s --build=%s "
               "--suite_name=hardware_storagequal_cq --retry=True "
               "--max_retries=%d" % (moblab_host.AUTOTEST_INSTALL_DIR, board,
                                     target_build, moblab_suite_max_retries))
        cmd, run_suite_timeout_s = self._append_run_suite_timeout(
            cmd,
            test_timeout_hint_m,
        )

        logging.debug('Run suite command: %s', cmd)
        try:
            result = host.run_as_moblab(cmd, timeout=run_suite_timeout_s)
        except error.AutoservRunError as e:
            if _is_run_suite_error_critical(e.result_obj.exit_status):
                raise

        logging.debug('Suite Run Output:\n%s', result.stderr)

        job_ids = self._get_job_ids_from_suite_output(result.stderr)

        logging.debug('Suite job ids %s', job_ids)

        keyvals_per_host = self._get_keyval_files_per_host(host, job_ids)

        logging.debug('Keyvals grouped by host %s', keyvals_per_host)

        failed_test = False
        for hostname in keyvals_per_host:
            label = dut_to_label[hostname]
            expected = self.EXPECTED_RESULTS[label]
            actual = self._get_test_execution_order(host,
                                                    keyvals_per_host[hostname])

            logging.info('Comparing test order for %s from host %s', label,
                         hostname)
            logging.info('%-37s %s', 'Expected', 'Actual')
            for i in range(max(len(expected), len(actual))):
                expected_i = expected[i] if i < len(expected) else None
                actual_i = actual[i] if i < len(actual) else None
                check_fail = expected_i != actual_i
                check_text = 'X' if check_fail else ' '
                logging.info('%s %-35s %s', check_text, expected_i, actual_i)
                failed_test = failed_test or check_fail

        # Cache directory can contain large binaries like CTS/CTS zip files
        # no need to offload those in the results.
        # The cache is owned by root user
        host.run('rm -fR /mnt/moblab/results/shared/cache', timeout=600)

        if failed_test:
            raise error.TestFail(
                'Actual test execution order did not match expected')
Esempio n. 17
0
 def test_len(self):
     labels = ['webcam', 'pool:suites']
     mapping = labellib.LabelsMapping(labels)
     self.assertEqual(len(mapping), 1)
Esempio n. 18
0
 def test_iter(self):
     labels = ['webcam', 'pool:suites']
     mapping = labellib.LabelsMapping(labels)
     self.assertEqual(list(iter(mapping)), ['pool'])
Esempio n. 19
0
 def test_delitem(self):
     labels = ['webcam', 'pool:suites']
     mapping = labellib.LabelsMapping(labels)
     del mapping['pool']
     self.assertNotIn('pool', mapping)
Esempio n. 20
0
 def test_in(self):
     labels = ['webcam', 'pool:suites']
     mapping = labellib.LabelsMapping(labels)
     self.assertIn('pool', mapping)
Esempio n. 21
0
 def test_init_and_getlabels_should_preserve_keyval_label_order(self):
     labels = ['class:protecta', 'method:metafalica', 'exec:chronicle_key']
     mapping = labellib.LabelsMapping(labels)
     self.assertEqual(mapping.getlabels(), labels)
Esempio n. 22
0
 def test_init_and_getlabels_should_remove_duplicates(self):
     labels = ['webcam', 'pool:suites', 'pool:party']
     mapping = labellib.LabelsMapping(labels)
     self.assertEqual(mapping.getlabels(), ['webcam', 'pool:suites'])
Esempio n. 23
0
 def test_setitem_to_none_should_delete(self):
     labels = ['webcam', 'pool:suites']
     mapping = labellib.LabelsMapping(labels)
     mapping['pool'] = None
     self.assertNotIn('pool', mapping)