Ejemplo n.º 1
0
    def _sanitize_all_duts(dut_objects):
        """Clean up logs and reboot all the DUTs.

        @param dut_objects: A list of DUTObjects for all DUTs allocated for the
                            test.
        """
        for dut in dut_objects:
            utils.sanitize_client(dut.host)
Ejemplo n.º 2
0
    def run(self, job, batch_size=10, tries=10, capturer_hostname=None,
            conn_worker=None, work_client_hostname=None,
            disabled_sysinfo=False):
        """Executes Chaos test.

        @param job: an Autotest job object.
        @param batch_size: an integer, max number of APs to lock in one batch.
        @param tries: an integer, number of iterations to run per AP.
        @param capturer_hostname: a string or None, hostname or IP of capturer.
        @param conn_worker: ConnectionWorkerAbstract or None, to run extra
                            work after successful connection.
        @param work_client_hostname: a string or None, hostname of work client
        @param disabled_sysinfo: a bool, disable collection of logs from DUT.


        @raises TestError: Issues locking VM webdriver instance
        """

        lock_manager = host_lock_manager.HostLockManager()
        webdriver_master = hosts.SSHHost(MASTERNAME, user='******')
        host_prefix = self._host.hostname.split('-')[0]
        with host_lock_manager.HostsLockedBy(lock_manager):
            capture_host = utils.allocate_packet_capturer(
                    lock_manager, hostname=capturer_hostname,
                    prefix=host_prefix)
            # Cleanup and reboot packet capturer before the test.
            utils.sanitize_client(capture_host)
            capturer = site_linux_system.LinuxSystem(capture_host, {},
                                                     'packet_capturer')

            # Run iw scan and abort if more than allowed number of APs are up.
            iw_command = iw_runner.IwRunner(capture_host)
            start_time = time.time()
            logging.info('Performing a scan with a max timeout of 30 seconds.')
            capture_interface = 'wlan0'
            capturer_info = capture_host.run('cat /etc/lsb-release',
                                             ignore_status=True, timeout=5).stdout
            if 'whirlwind' in capturer_info:
                # Use the dual band aux radio for scanning networks.
                capture_interface = 'wlan2'
            while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT:
                networks = iw_command.scan(capture_interface)
                if networks is None:
                    if (time.time() - start_time ==
                            ap_constants.MAX_SCAN_TIMEOUT):
                        raise error.TestError(
                            'Packet capturer is not responding to scans. Check'
                            'device and re-run test')
                    continue
                elif len(networks) < ap_constants.MAX_SSID_COUNT:
                    break
                elif len(networks) >= ap_constants.MAX_SSID_COUNT:
                    raise error.TestError(
                        'Probably someone is already running a '
                        'chaos test?!')

            if conn_worker is not None:
                work_client_machine = utils.allocate_packet_capturer(
                        lock_manager, hostname=work_client_hostname)
                conn_worker.prepare_work_client(work_client_machine)

            # Lock VM. If on, power off; always power on. Then create a tunnel.
            webdriver_instance = utils.allocate_webdriver_instance(lock_manager)

            if utils.is_VM_running(webdriver_master, webdriver_instance):
                logging.info('VM %s was on; powering off for a clean instance',
                             webdriver_instance)
                utils.power_off_VM(webdriver_master, webdriver_instance)
                logging.info('Allow VM time to gracefully shut down')
                time.sleep(5)

            logging.info('Starting up VM %s', webdriver_instance)
            utils.power_on_VM(webdriver_master, webdriver_instance)
            logging.info('Allow VM time to power on before creating a tunnel.')
            time.sleep(30)

            if not client_utils.host_is_in_lab_zone(webdriver_instance.hostname):
                self._ap_spec._webdriver_hostname = webdriver_instance.hostname
            else:
                # If in the lab then port forwarding must be done so webdriver
                # connection will be over localhost.
                self._ap_spec._webdriver_hostname = 'localhost'
                webdriver_tunnel = webdriver_instance.create_ssh_tunnel(
                                                WEBDRIVER_PORT, WEBDRIVER_PORT)
                logging.info('Wait for tunnel to be created.')
                for i in range(3):
                    time.sleep(10)
                    results = client_utils.run('lsof -i:%s' % WEBDRIVER_PORT,
                                             ignore_status=True)
                    if results:
                        break
                if not results:
                    raise error.TestError(
                            'Unable to listen to WEBDRIVER_PORT: %s', results)

            batch_locker = ap_batch_locker.ApBatchLocker(
                    lock_manager, self._ap_spec,
                    ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS)

            while batch_locker.has_more_aps():
                # Work around for CrOS devices only:crbug.com/358716
                utils.sanitize_client(self._host)
                healthy_dut = True

                with contextlib.closing(wifi_client.WiFiClient(
                    hosts.create_host(
                            {
                                    'hostname' : self._host.hostname,
                                    'afe_host' : self._host._afe_host,
                                    'host_info_store':
                                            self._host.host_info_store,
                            },
                            host_class=self._host.__class__,
                    ),
                    './debug',
                    False,
                )) as client:

                    aps = batch_locker.get_ap_batch(batch_size=batch_size)
                    if not aps:
                        logging.info('No more APs to test.')
                        break

                    # Power down all of the APs because some can get grumpy
                    # if they are configured several times and remain on.
                    # User the cartridge to down group power downs and
                    # configurations.
                    utils.power_down_aps(aps, self._broken_pdus)
                    utils.configure_aps(aps, self._ap_spec, self._broken_pdus)

                    aps = utils.filter_quarantined_and_config_failed_aps(aps,
                            batch_locker, job, self._broken_pdus)

                    for ap in aps:
                        # http://crbug.com/306687
                        if ap.ssid == None:
                            logging.error('The SSID was not set for the AP:%s',
                                          ap)

                        healthy_dut = utils.is_dut_healthy(client, ap)

                        if not healthy_dut:
                            logging.error('DUT is not healthy, rebooting.')
                            batch_locker.unlock_and_reclaim_aps()
                            break

                        networks = utils.return_available_networks(
                                ap, capturer, job, self._ap_spec)

                        if networks is None:
                            # If scan returned no networks, iw scan failed.
                            # Reboot the packet capturer device and
                            # reconfigure the capturer.
                            batch_locker.unlock_and_reclaim_ap(ap.host_name)
                            logging.error('Packet capture is not healthy, '
                                          'rebooting.')
                            capturer.host.reboot()
                            capturer = site_linux_system.LinuxSystem(
                                           capture_host, {},'packet_capturer')
                            continue
                        if networks == list():
                           # Packet capturer did not find the SSID in scan or
                           # there was a security mismatch.
                           utils.release_ap(ap, batch_locker, self._broken_pdus)
                           continue

                        assoc_params = ap.get_association_parameters()

                        if not utils.is_conn_worker_healthy(
                                conn_worker, ap, assoc_params, job):
                            utils.release_ap(
                                    ap, batch_locker, self._broken_pdus)
                            continue

                        name = ap.name
                        kernel_ver = self._host.get_kernel_ver()
                        firmware_ver = utils.get_firmware_ver(self._host)
                        if not firmware_ver:
                            firmware_ver = "Unknown"

                        debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++',
                                      'SSID': ap._ssid,
                                      'DUT': client.wifi_mac,
                                      'AP Info': ap.name,
                                      'kernel_version': kernel_ver,
                                      'wifi_firmware_version': firmware_ver}
                        debug_string = pprint.pformat(debug_dict)

                        logging.info('Waiting %d seconds for the AP dhcp '
                                     'server', ap.dhcp_delay)
                        time.sleep(ap.dhcp_delay)

                        result = job.run_test(self._test,
                                     capturer=capturer,
                                     capturer_frequency=networks[0].frequency,
                                     capturer_ht_type=networks[0].ht,
                                     host=self._host,
                                     assoc_params=assoc_params,
                                     client=client,
                                     tries=tries,
                                     debug_info=debug_string,
                                     # Copy all logs from the system
                                     disabled_sysinfo=disabled_sysinfo,
                                     conn_worker=conn_worker,
                                     tag=ap.ssid if conn_worker is None else
                                         '%s.%s' % (conn_worker.name, ap.ssid))

                        utils.release_ap(ap, batch_locker, self._broken_pdus)

                        if conn_worker is not None:
                            conn_worker.cleanup()

                    if not healthy_dut:
                        continue

                batch_locker.unlock_aps()

            if webdriver_tunnel:
                webdriver_instance.disconnect_ssh_tunnel(webdriver_tunnel,
                                                         WEBDRIVER_PORT)
                webdriver_instance.close()
            capturer.close()
            logging.info('Powering off VM %s', webdriver_instance)
            utils.power_off_VM(webdriver_master, webdriver_instance)
            lock_manager.unlock(webdriver_instance.hostname)

            if self._broken_pdus:
                logging.info('PDU is down!!!\nThe following PDUs are down:\n')
                pprint.pprint(self._broken_pdus)

            factory = ap_configurator_factory.APConfiguratorFactory(
                    ap_constants.AP_TEST_TYPE_CHAOS)
            factory.turn_off_all_routers(self._broken_pdus)
    def run(self,
            job,
            batch_size=10,
            tries=10,
            capturer_hostname=None,
            conn_worker=None,
            work_client_hostname=None,
            disabled_sysinfo=False):
        """Executes Chaos test.

        @param job: an Autotest job object.
        @param batch_size: an integer, max number of APs to lock in one batch.
        @param tries: an integer, number of iterations to run per AP.
        @param capturer_hostname: a string or None, hostname or IP of capturer.
        @param conn_worker: ConnectionWorkerAbstract or None, to run extra
                            work after successful connection.
        @param work_client_hostname: a string or None, hostname of work client
        @param disabled_sysinfo: a bool, disable collection of logs from DUT.


        @raises TestError: Packet capture DUT may be down or another test may be
                           running in the chamber.
        """

        lock_manager = host_lock_manager.HostLockManager()
        host_prefix = self._host.hostname.split('-')[0]
        if ap_constants.CASEY5 in host_prefix:
            test_type = ap_constants.AP_TEST_TYPE_CASEY5
        elif ap_constants.CASEY7 in host_prefix:
            test_type = ap_constants.AP_TEST_TYPE_CASEY7
        else:
            test_type = None

        with host_lock_manager.HostsLockedBy(lock_manager):
            capture_host = utils.allocate_packet_capturer(
                lock_manager, hostname=capturer_hostname, prefix=host_prefix)
            # Cleanup and reboot packet capturer before the test.
            utils.sanitize_client(capture_host)
            capturer = site_linux_system.LinuxSystem(capture_host, {},
                                                     'packet_capturer')

            # Run iw scan and abort if more than allowed number of APs are up.
            iw_command = iw_runner.IwRunner(capture_host)
            start_time = time.time()
            logging.info('Performing a scan with a max timeout of 30 seconds.')
            capture_interface = 'wlan0'
            capturer_info = capture_host.run('cat /etc/lsb-release',
                                             ignore_status=True,
                                             timeout=5).stdout
            if 'whirlwind' in capturer_info:
                # Use the dual band aux radio for scanning networks.
                capture_interface = 'wlan2'
            while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT:
                networks = iw_command.scan(capture_interface)
                if networks is None:
                    if (time.time() -
                            start_time == ap_constants.MAX_SCAN_TIMEOUT):
                        raise error.TestError(
                            'Packet capturer is not responding to scans. Check'
                            'device and re-run test')
                    continue
                elif len(networks) < ap_constants.MAX_SSID_COUNT:
                    break
                elif len(networks) >= ap_constants.MAX_SSID_COUNT:
                    raise error.TestError(
                        'Probably someone is already running a '
                        'chaos test?!')

            if conn_worker is not None:
                work_client_machine = utils.allocate_packet_capturer(
                    lock_manager, hostname=work_client_hostname)
                conn_worker.prepare_work_client(work_client_machine)

            batch_locker = ap_batch_locker.ApBatchLocker(
                lock_manager, self._ap_spec, ap_test_type=test_type)

            while batch_locker.has_more_aps():
                # Work around for CrOS devices only:crbug.com/358716
                # Do not reboot Android devices:b/27977927
                if self._host.get_os_type() != adb_host.OS_TYPE_ANDROID:
                    utils.sanitize_client(self._host)
                healthy_dut = True

                with contextlib.closing(
                        wifi_client.WiFiClient(
                            hosts.create_host(
                                {
                                    'hostname': self._host.hostname,
                                    'afe_host': self._host._afe_host
                                },
                                host_class=self._host.__class__), './debug',
                            False)) as client:

                    aps = batch_locker.get_ap_batch(batch_size=batch_size)
                    if not aps:
                        logging.info('No more APs to test.')
                        break

                    utils.configure_aps(aps, self._ap_spec)

                    aps = utils.filter_quarantined_and_config_failed_aps(
                        aps, batch_locker, job)

                    for ap in aps:
                        # http://crbug.com/306687
                        if ap.ssid == None:
                            logging.error('The SSID was not set for the AP:%s',
                                          ap)

                        healthy_dut = utils.is_dut_healthy(client, ap)

                        if not healthy_dut:
                            logging.error('DUT is not healthy, rebooting.')
                            batch_locker.unlock_and_reclaim_aps()
                            break

                        networks = utils.return_available_networks(
                            ap, capturer, job, self._ap_spec)

                        if networks is None:
                            # If scan returned no networks, iw scan failed.
                            # Reboot the packet capturer device and
                            # reconfigure the capturer.
                            batch_locker.unlock_and_reclaim_ap(ap.host_name)
                            logging.error('Packet capture is not healthy, '
                                          'rebooting.')
                            capturer.host.reboot()
                            capturer = site_linux_system.LinuxSystem(
                                capture_host, {}, 'packet_capturer')
                            continue
                        if networks == list():
                            # Packet capturer did not find the SSID in scan or
                            # there was a security mismatch.
                            utils.release_ap(ap, batch_locker)
                            continue

                        assoc_params = ap.get_association_parameters()

                        if not utils.is_conn_worker_healthy(
                                conn_worker, ap, assoc_params, job):
                            utils.release_ap(ap, batch_locker)
                            continue

                        name = ap.name
                        kernel_ver = self._host.get_kernel_ver()
                        firmware_ver = utils.get_firmware_ver(self._host)
                        if not firmware_ver:
                            firmware_ver = "Unknown"

                        debug_dict = {
                            '+++PARSE DATA+++': '+++PARSE DATA+++',
                            'SSID': ap._ssid,
                            'DUT': client.wifi_mac,
                            'AP Info': ap.name,
                            'kernel_version': kernel_ver,
                            'wifi_firmware_version': firmware_ver
                        }
                        debug_string = pprint.pformat(debug_dict)

                        logging.info(
                            'Waiting %d seconds for the AP dhcp '
                            'server', ap.dhcp_delay)
                        time.sleep(ap.dhcp_delay)

                        result = job.run_test(
                            self._test,
                            capturer=capturer,
                            capturer_frequency=networks[0].frequency,
                            capturer_ht_type=networks[0].ht,
                            host=self._host,
                            assoc_params=assoc_params,
                            client=client,
                            tries=tries,
                            debug_info=debug_string,
                            # Copy all logs from the system
                            disabled_sysinfo=disabled_sysinfo,
                            conn_worker=conn_worker,
                            tag=ap.ssid if conn_worker is None else '%s.%s' %
                            (conn_worker.name, ap.ssid))

                        utils.release_ap(ap, batch_locker)

                        if conn_worker is not None:
                            conn_worker.cleanup()

                    if not healthy_dut:
                        continue

                batch_locker.unlock_aps()

            capturer.close()

            factory = ap_configurator_factory.APConfiguratorFactory(test_type)
            factory.turn_off_all_routers([])
    def run(self,
            job,
            batch_size=7,
            tries=10,
            capturer_hostname=None,
            conn_worker=None,
            work_client_hostname=None,
            disabled_sysinfo=False):
        """Executes Chaos test.

        @param job: an Autotest job object.
        @param batch_size: an integer, max number of APs to lock in one batch.
        @param tries: an integer, number of iterations to run per AP.
        @param capturer_hostname: a string or None, hostname or IP of capturer.
        @param conn_worker: ConnectionWorkerAbstract or None, to run extra
                            work after successful connection.
        @param work_client_hostname: a string or None, hostname of work client
        @param disabled_sysinfo: a bool, disable collection of logs from DUT.


        @raises TestError: Issues locking VM webdriver instance
        """

        lock_manager = host_lock_manager.HostLockManager()
        webdriver_master = hosts.SSHHost(MASTERNAME, user='******')
        with host_lock_manager.HostsLockedBy(lock_manager):
            capture_host = utils.allocate_packet_capturer(
                lock_manager, hostname=capturer_hostname)
            # Cleanup and reboot packet capturer before the test.
            utils.sanitize_client(capture_host)
            capturer = site_linux_system.LinuxSystem(capture_host, {},
                                                     'packet_capturer')

            # Run iw scan and abort if more than allowed number of APs are up.
            iw_command = iw_runner.IwRunner(capture_host)
            start_time = time.time()
            logging.info('Performing a scan with a max timeout of 30 seconds.')
            while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT:
                networks = iw_command.scan('wlan0')
                if networks is None:
                    if (time.time() -
                            start_time == ap_constants.MAX_SCAN_TIMEOUT):
                        raise error.TestError(
                            'Packet capturer is not responding to scans. Check'
                            'device and re-run test')
                    continue
                elif len(networks) < ap_constants.MAX_SSID_COUNT:
                    break
                elif len(networks) >= ap_constants.MAX_SSID_COUNT:
                    raise error.TestError(
                        'Probably someone is already running a'
                        'chaos test?!')

            if conn_worker is not None:
                work_client_machine = utils.allocate_packet_capturer(
                    lock_manager, hostname=work_client_hostname)
                conn_worker.prepare_work_client(work_client_machine)

            webdriver_instance = utils.allocate_webdriver_instance(
                lock_manager)
            self._ap_spec._webdriver_hostname = webdriver_instance

            # If a test is cancelled or aborted the VM may be left on.  Always
            # turn of the VM to return it to a clean state.
            try:
                logging.info('Always power off VM %s', webdriver_instance)
                utils.power_off_VM(webdriver_master, webdriver_instance)
            except:
                logging.debug('VM was already off, ignoring.')

            logging.info('Starting up VM %s', webdriver_instance)
            utils.power_on_VM(webdriver_master, webdriver_instance)

            batch_locker = ap_batch_locker.ApBatchLocker(
                lock_manager,
                self._ap_spec,
                ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS)

            while batch_locker.has_more_aps():
                # Work around crbug.com/358716
                utils.sanitize_client(self._host)
                healthy_dut = True

                with contextlib.closing(
                        wifi_client.WiFiClient(
                            hosts.create_host(self._host.hostname), './debug',
                            False)) as client:

                    aps = batch_locker.get_ap_batch(batch_size=batch_size)
                    if not aps:
                        logging.info('No more APs to test.')
                        break

                    # Power down all of the APs because some can get grumpy
                    # if they are configured several times and remain on.
                    # User the cartridge to down group power downs and
                    # configurations.
                    utils.power_down_aps(aps, self._broken_pdus)
                    utils.configure_aps(aps, self._ap_spec, self._broken_pdus)

                    aps = utils.filter_quarantined_and_config_failed_aps(
                        aps, batch_locker, job, self._broken_pdus)

                    for ap in aps:
                        # http://crbug.com/306687
                        if ap.ssid == None:
                            logging.error('The SSID was not set for the AP:%s',
                                          ap)

                        healthy_dut = utils.is_dut_healthy(client, ap)

                        if not healthy_dut:
                            logging.error('DUT is not healthy, rebooting.')
                            batch_locker.unlock_and_reclaim_aps()
                            break

                        networks = utils.return_available_networks(
                            ap, capturer, job, self._ap_spec)

                        if networks is None:
                            # If scan returned no networks, iw scan failed.
                            # Reboot the packet capturer device and
                            # reconfigure the capturer.
                            batch_locker.unlock_and_reclaim_ap(ap.host_name)
                            logging.error('Packet capture is not healthy, '
                                          'rebooting.')
                            capturer.host.reboot()
                            capturer = site_linux_system.LinuxSystem(
                                capture_host, {}, 'packet_capturer')
                            continue
                        if networks == list():
                            # Packet capturer did not find the SSID in scan or
                            # there was a security mismatch.
                            utils.release_ap(ap, batch_locker,
                                             self._broken_pdus)
                            continue

                        assoc_params = ap.get_association_parameters()

                        if not utils.is_conn_worker_healthy(
                                conn_worker, ap, assoc_params, job):
                            utils.release_ap(ap, batch_locker,
                                             self._broken_pdus)
                            continue

                        name = ap.name
                        kernel_ver = self._host.get_kernel_ver()
                        firmware_ver = utils.get_firmware_ver(self._host)
                        if not firmware_ver:
                            firmware_ver = "Unknown"

                        debug_dict = {
                            '+++PARSE DATA+++': '+++PARSE DATA+++',
                            'SSID': ap._ssid,
                            'DUT': client.wifi_mac,
                            'AP Info': ap.name,
                            'kernel_version': kernel_ver,
                            'wifi_firmware_version': firmware_ver
                        }
                        debug_string = pprint.pformat(debug_dict)

                        logging.info(
                            'Waiting %d seconds for the AP dhcp '
                            'server', ap.dhcp_delay)
                        time.sleep(ap.dhcp_delay)

                        result = job.run_test(
                            self._test,
                            capturer=capturer,
                            capturer_frequency=networks[0].frequency,
                            capturer_ht_type=networks[0].ht,
                            host=self._host,
                            assoc_params=assoc_params,
                            client=client,
                            tries=tries,
                            debug_info=debug_string,
                            # Copy all logs from the system
                            disabled_sysinfo=disabled_sysinfo,
                            conn_worker=conn_worker,
                            tag=ap.ssid if conn_worker is None else '%s.%s' %
                            (conn_worker.name, ap.ssid))

                        utils.release_ap(ap, batch_locker, self._broken_pdus)

                        if conn_worker is not None:
                            conn_worker.cleanup()

                    if not healthy_dut:
                        continue

                batch_locker.unlock_aps()
            capturer.close()
            logging.info('Powering off VM %s', webdriver_instance)
            utils.power_off_VM(webdriver_master, webdriver_instance)
            lock_manager.unlock(webdriver_instance)

            if self._broken_pdus:
                logging.info('PDU is down!!!\nThe following PDUs are down:\n')
                pprint.pprint(self._broken_pdus)