def run(self, job, batch_size=10, tries=10, capturer_hostname=None, conn_worker=None, work_client_hostname=None, disabled_sysinfo=False): """Executes Chaos test. @param job: an Autotest job object. @param batch_size: an integer, max number of APs to lock in one batch. @param tries: an integer, number of iterations to run per AP. @param capturer_hostname: a string or None, hostname or IP of capturer. @param conn_worker: ConnectionWorkerAbstract or None, to run extra work after successful connection. @param work_client_hostname: a string or None, hostname of work client @param disabled_sysinfo: a bool, disable collection of logs from DUT. @raises TestError: Issues locking VM webdriver instance """ lock_manager = host_lock_manager.HostLockManager() webdriver_master = hosts.SSHHost(MASTERNAME, user='******') host_prefix = self._host.hostname.split('-')[0] with host_lock_manager.HostsLockedBy(lock_manager): capture_host = utils.allocate_packet_capturer( lock_manager, hostname=capturer_hostname, prefix=host_prefix) # Cleanup and reboot packet capturer before the test. utils.sanitize_client(capture_host) capturer = site_linux_system.LinuxSystem(capture_host, {}, 'packet_capturer') # Run iw scan and abort if more than allowed number of APs are up. iw_command = iw_runner.IwRunner(capture_host) start_time = time.time() logging.info('Performing a scan with a max timeout of 30 seconds.') capture_interface = 'wlan0' capturer_info = capture_host.run('cat /etc/lsb-release', ignore_status=True, timeout=5).stdout if 'whirlwind' in capturer_info: # Use the dual band aux radio for scanning networks. capture_interface = 'wlan2' while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT: networks = iw_command.scan(capture_interface) if networks is None: if (time.time() - start_time == ap_constants.MAX_SCAN_TIMEOUT): raise error.TestError( 'Packet capturer is not responding to scans. Check' 'device and re-run test') continue elif len(networks) < ap_constants.MAX_SSID_COUNT: break elif len(networks) >= ap_constants.MAX_SSID_COUNT: raise error.TestError( 'Probably someone is already running a ' 'chaos test?!') if conn_worker is not None: work_client_machine = utils.allocate_packet_capturer( lock_manager, hostname=work_client_hostname) conn_worker.prepare_work_client(work_client_machine) # Lock VM. If on, power off; always power on. Then create a tunnel. webdriver_instance = utils.allocate_webdriver_instance(lock_manager) if utils.is_VM_running(webdriver_master, webdriver_instance): logging.info('VM %s was on; powering off for a clean instance', webdriver_instance) utils.power_off_VM(webdriver_master, webdriver_instance) logging.info('Allow VM time to gracefully shut down') time.sleep(5) logging.info('Starting up VM %s', webdriver_instance) utils.power_on_VM(webdriver_master, webdriver_instance) logging.info('Allow VM time to power on before creating a tunnel.') time.sleep(30) if not client_utils.host_is_in_lab_zone(webdriver_instance.hostname): self._ap_spec._webdriver_hostname = webdriver_instance.hostname else: # If in the lab then port forwarding must be done so webdriver # connection will be over localhost. self._ap_spec._webdriver_hostname = 'localhost' webdriver_tunnel = webdriver_instance.create_ssh_tunnel( WEBDRIVER_PORT, WEBDRIVER_PORT) logging.info('Wait for tunnel to be created.') for i in range(3): time.sleep(10) results = client_utils.run('lsof -i:%s' % WEBDRIVER_PORT, ignore_status=True) if results: break if not results: raise error.TestError( 'Unable to listen to WEBDRIVER_PORT: %s', results) batch_locker = ap_batch_locker.ApBatchLocker( lock_manager, self._ap_spec, ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS) while batch_locker.has_more_aps(): # Work around for CrOS devices only:crbug.com/358716 utils.sanitize_client(self._host) healthy_dut = True with contextlib.closing(wifi_client.WiFiClient( hosts.create_host( { 'hostname' : self._host.hostname, 'afe_host' : self._host._afe_host, 'host_info_store': self._host.host_info_store, }, host_class=self._host.__class__, ), './debug', False, )) as client: aps = batch_locker.get_ap_batch(batch_size=batch_size) if not aps: logging.info('No more APs to test.') break # Power down all of the APs because some can get grumpy # if they are configured several times and remain on. # User the cartridge to down group power downs and # configurations. utils.power_down_aps(aps, self._broken_pdus) utils.configure_aps(aps, self._ap_spec, self._broken_pdus) aps = utils.filter_quarantined_and_config_failed_aps(aps, batch_locker, job, self._broken_pdus) for ap in aps: # http://crbug.com/306687 if ap.ssid == None: logging.error('The SSID was not set for the AP:%s', ap) healthy_dut = utils.is_dut_healthy(client, ap) if not healthy_dut: logging.error('DUT is not healthy, rebooting.') batch_locker.unlock_and_reclaim_aps() break networks = utils.return_available_networks( ap, capturer, job, self._ap_spec) if networks is None: # If scan returned no networks, iw scan failed. # Reboot the packet capturer device and # reconfigure the capturer. batch_locker.unlock_and_reclaim_ap(ap.host_name) logging.error('Packet capture is not healthy, ' 'rebooting.') capturer.host.reboot() capturer = site_linux_system.LinuxSystem( capture_host, {},'packet_capturer') continue if networks == list(): # Packet capturer did not find the SSID in scan or # there was a security mismatch. utils.release_ap(ap, batch_locker, self._broken_pdus) continue assoc_params = ap.get_association_parameters() if not utils.is_conn_worker_healthy( conn_worker, ap, assoc_params, job): utils.release_ap( ap, batch_locker, self._broken_pdus) continue name = ap.name kernel_ver = self._host.get_kernel_ver() firmware_ver = utils.get_firmware_ver(self._host) if not firmware_ver: firmware_ver = "Unknown" debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++', 'SSID': ap._ssid, 'DUT': client.wifi_mac, 'AP Info': ap.name, 'kernel_version': kernel_ver, 'wifi_firmware_version': firmware_ver} debug_string = pprint.pformat(debug_dict) logging.info('Waiting %d seconds for the AP dhcp ' 'server', ap.dhcp_delay) time.sleep(ap.dhcp_delay) result = job.run_test(self._test, capturer=capturer, capturer_frequency=networks[0].frequency, capturer_ht_type=networks[0].ht, host=self._host, assoc_params=assoc_params, client=client, tries=tries, debug_info=debug_string, # Copy all logs from the system disabled_sysinfo=disabled_sysinfo, conn_worker=conn_worker, tag=ap.ssid if conn_worker is None else '%s.%s' % (conn_worker.name, ap.ssid)) utils.release_ap(ap, batch_locker, self._broken_pdus) if conn_worker is not None: conn_worker.cleanup() if not healthy_dut: continue batch_locker.unlock_aps() if webdriver_tunnel: webdriver_instance.disconnect_ssh_tunnel(webdriver_tunnel, WEBDRIVER_PORT) webdriver_instance.close() capturer.close() logging.info('Powering off VM %s', webdriver_instance) utils.power_off_VM(webdriver_master, webdriver_instance) lock_manager.unlock(webdriver_instance.hostname) if self._broken_pdus: logging.info('PDU is down!!!\nThe following PDUs are down:\n') pprint.pprint(self._broken_pdus) factory = ap_configurator_factory.APConfiguratorFactory( ap_constants.AP_TEST_TYPE_CHAOS) factory.turn_off_all_routers(self._broken_pdus)
def run(self, job, batch_size=7, tries=10, capturer_hostname=None, conn_worker=None, work_client_hostname=None, disabled_sysinfo=False): """Executes Chaos test. @param job: an Autotest job object. @param batch_size: an integer, max number of APs to lock in one batch. @param tries: an integer, number of iterations to run per AP. @param capturer_hostname: a string or None, hostname or IP of capturer. @param conn_worker: ConnectionWorkerAbstract or None, to run extra work after successful connection. @param work_client_hostname: a string or None, hostname of work client @param disabled_sysinfo: a bool, disable collection of logs from DUT. @raises TestError: Issues locking VM webdriver instance """ lock_manager = host_lock_manager.HostLockManager() webdriver_master = hosts.SSHHost(MASTERNAME, user='******') with host_lock_manager.HostsLockedBy(lock_manager): capture_host = utils.allocate_packet_capturer( lock_manager, hostname=capturer_hostname) # Cleanup and reboot packet capturer before the test. utils.sanitize_client(capture_host) capturer = site_linux_system.LinuxSystem(capture_host, {}, 'packet_capturer') # Run iw scan and abort if more than allowed number of APs are up. iw_command = iw_runner.IwRunner(capture_host) start_time = time.time() logging.info('Performing a scan with a max timeout of 30 seconds.') while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT: networks = iw_command.scan('wlan0') if networks is None: if (time.time() - start_time == ap_constants.MAX_SCAN_TIMEOUT): raise error.TestError( 'Packet capturer is not responding to scans. Check' 'device and re-run test') continue elif len(networks) < ap_constants.MAX_SSID_COUNT: break elif len(networks) >= ap_constants.MAX_SSID_COUNT: raise error.TestError( 'Probably someone is already running a' 'chaos test?!') if conn_worker is not None: work_client_machine = utils.allocate_packet_capturer( lock_manager, hostname=work_client_hostname) conn_worker.prepare_work_client(work_client_machine) webdriver_instance = utils.allocate_webdriver_instance( lock_manager) self._ap_spec._webdriver_hostname = webdriver_instance # If a test is cancelled or aborted the VM may be left on. Always # turn of the VM to return it to a clean state. try: logging.info('Always power off VM %s', webdriver_instance) utils.power_off_VM(webdriver_master, webdriver_instance) except: logging.debug('VM was already off, ignoring.') logging.info('Starting up VM %s', webdriver_instance) utils.power_on_VM(webdriver_master, webdriver_instance) batch_locker = ap_batch_locker.ApBatchLocker( lock_manager, self._ap_spec, ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS) while batch_locker.has_more_aps(): # Work around crbug.com/358716 utils.sanitize_client(self._host) healthy_dut = True with contextlib.closing( wifi_client.WiFiClient( hosts.create_host(self._host.hostname), './debug', False)) as client: aps = batch_locker.get_ap_batch(batch_size=batch_size) if not aps: logging.info('No more APs to test.') break # Power down all of the APs because some can get grumpy # if they are configured several times and remain on. # User the cartridge to down group power downs and # configurations. utils.power_down_aps(aps, self._broken_pdus) utils.configure_aps(aps, self._ap_spec, self._broken_pdus) aps = utils.filter_quarantined_and_config_failed_aps( aps, batch_locker, job, self._broken_pdus) for ap in aps: # http://crbug.com/306687 if ap.ssid == None: logging.error('The SSID was not set for the AP:%s', ap) healthy_dut = utils.is_dut_healthy(client, ap) if not healthy_dut: logging.error('DUT is not healthy, rebooting.') batch_locker.unlock_and_reclaim_aps() break networks = utils.return_available_networks( ap, capturer, job, self._ap_spec) if networks is None: # If scan returned no networks, iw scan failed. # Reboot the packet capturer device and # reconfigure the capturer. batch_locker.unlock_and_reclaim_ap(ap.host_name) logging.error('Packet capture is not healthy, ' 'rebooting.') capturer.host.reboot() capturer = site_linux_system.LinuxSystem( capture_host, {}, 'packet_capturer') continue if networks == list(): # Packet capturer did not find the SSID in scan or # there was a security mismatch. utils.release_ap(ap, batch_locker, self._broken_pdus) continue assoc_params = ap.get_association_parameters() if not utils.is_conn_worker_healthy( conn_worker, ap, assoc_params, job): utils.release_ap(ap, batch_locker, self._broken_pdus) continue name = ap.name kernel_ver = self._host.get_kernel_ver() firmware_ver = utils.get_firmware_ver(self._host) if not firmware_ver: firmware_ver = "Unknown" debug_dict = { '+++PARSE DATA+++': '+++PARSE DATA+++', 'SSID': ap._ssid, 'DUT': client.wifi_mac, 'AP Info': ap.name, 'kernel_version': kernel_ver, 'wifi_firmware_version': firmware_ver } debug_string = pprint.pformat(debug_dict) logging.info( 'Waiting %d seconds for the AP dhcp ' 'server', ap.dhcp_delay) time.sleep(ap.dhcp_delay) result = job.run_test( self._test, capturer=capturer, capturer_frequency=networks[0].frequency, capturer_ht_type=networks[0].ht, host=self._host, assoc_params=assoc_params, client=client, tries=tries, debug_info=debug_string, # Copy all logs from the system disabled_sysinfo=disabled_sysinfo, conn_worker=conn_worker, tag=ap.ssid if conn_worker is None else '%s.%s' % (conn_worker.name, ap.ssid)) utils.release_ap(ap, batch_locker, self._broken_pdus) if conn_worker is not None: conn_worker.cleanup() if not healthy_dut: continue batch_locker.unlock_aps() capturer.close() logging.info('Powering off VM %s', webdriver_instance) utils.power_off_VM(webdriver_master, webdriver_instance) lock_manager.unlock(webdriver_instance) if self._broken_pdus: logging.info('PDU is down!!!\nThe following PDUs are down:\n') pprint.pprint(self._broken_pdus)
def run(self, job, batch_size=10, tries=10, capturer_hostname=None, conn_worker=None, work_client_hostname=None, disabled_sysinfo=False): """Executes Chaos test. @param job: an Autotest job object. @param batch_size: an integer, max number of APs to lock in one batch. @param tries: an integer, number of iterations to run per AP. @param capturer_hostname: a string or None, hostname or IP of capturer. @param conn_worker: ConnectionWorkerAbstract or None, to run extra work after successful connection. @param work_client_hostname: a string or None, hostname of work client @param disabled_sysinfo: a bool, disable collection of logs from DUT. @raises TestError: Packet capture DUT may be down or another test may be running in the chamber. """ lock_manager = host_lock_manager.HostLockManager() host_prefix = self._host.hostname.split('-')[0] if ap_constants.CASEY5 in host_prefix: test_type = ap_constants.AP_TEST_TYPE_CASEY5 elif ap_constants.CASEY7 in host_prefix: test_type = ap_constants.AP_TEST_TYPE_CASEY7 else: test_type = None with host_lock_manager.HostsLockedBy(lock_manager): capture_host = utils.allocate_packet_capturer( lock_manager, hostname=capturer_hostname, prefix=host_prefix) # Cleanup and reboot packet capturer before the test. utils.sanitize_client(capture_host) capturer = site_linux_system.LinuxSystem(capture_host, {}, 'packet_capturer') # Run iw scan and abort if more than allowed number of APs are up. iw_command = iw_runner.IwRunner(capture_host) start_time = time.time() logging.info('Performing a scan with a max timeout of 30 seconds.') capture_interface = 'wlan0' capturer_info = capture_host.run('cat /etc/lsb-release', ignore_status=True, timeout=5).stdout if 'whirlwind' in capturer_info: # Use the dual band aux radio for scanning networks. capture_interface = 'wlan2' while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT: networks = iw_command.scan(capture_interface) if networks is None: if (time.time() - start_time == ap_constants.MAX_SCAN_TIMEOUT): raise error.TestError( 'Packet capturer is not responding to scans. Check' 'device and re-run test') continue elif len(networks) < ap_constants.MAX_SSID_COUNT: break elif len(networks) >= ap_constants.MAX_SSID_COUNT: raise error.TestError( 'Probably someone is already running a ' 'chaos test?!') if conn_worker is not None: work_client_machine = utils.allocate_packet_capturer( lock_manager, hostname=work_client_hostname) conn_worker.prepare_work_client(work_client_machine) batch_locker = ap_batch_locker.ApBatchLocker( lock_manager, self._ap_spec, ap_test_type=test_type) while batch_locker.has_more_aps(): # Work around for CrOS devices only:crbug.com/358716 # Do not reboot Android devices:b/27977927 if self._host.get_os_type() != adb_host.OS_TYPE_ANDROID: utils.sanitize_client(self._host) healthy_dut = True with contextlib.closing( wifi_client.WiFiClient( hosts.create_host( { 'hostname': self._host.hostname, 'afe_host': self._host._afe_host }, host_class=self._host.__class__), './debug', False)) as client: aps = batch_locker.get_ap_batch(batch_size=batch_size) if not aps: logging.info('No more APs to test.') break utils.configure_aps(aps, self._ap_spec) aps = utils.filter_quarantined_and_config_failed_aps( aps, batch_locker, job) for ap in aps: # http://crbug.com/306687 if ap.ssid == None: logging.error('The SSID was not set for the AP:%s', ap) healthy_dut = utils.is_dut_healthy(client, ap) if not healthy_dut: logging.error('DUT is not healthy, rebooting.') batch_locker.unlock_and_reclaim_aps() break networks = utils.return_available_networks( ap, capturer, job, self._ap_spec) if networks is None: # If scan returned no networks, iw scan failed. # Reboot the packet capturer device and # reconfigure the capturer. batch_locker.unlock_and_reclaim_ap(ap.host_name) logging.error('Packet capture is not healthy, ' 'rebooting.') capturer.host.reboot() capturer = site_linux_system.LinuxSystem( capture_host, {}, 'packet_capturer') continue if networks == list(): # Packet capturer did not find the SSID in scan or # there was a security mismatch. utils.release_ap(ap, batch_locker) continue assoc_params = ap.get_association_parameters() if not utils.is_conn_worker_healthy( conn_worker, ap, assoc_params, job): utils.release_ap(ap, batch_locker) continue name = ap.name kernel_ver = self._host.get_kernel_ver() firmware_ver = utils.get_firmware_ver(self._host) if not firmware_ver: firmware_ver = "Unknown" debug_dict = { '+++PARSE DATA+++': '+++PARSE DATA+++', 'SSID': ap._ssid, 'DUT': client.wifi_mac, 'AP Info': ap.name, 'kernel_version': kernel_ver, 'wifi_firmware_version': firmware_ver } debug_string = pprint.pformat(debug_dict) logging.info( 'Waiting %d seconds for the AP dhcp ' 'server', ap.dhcp_delay) time.sleep(ap.dhcp_delay) result = job.run_test( self._test, capturer=capturer, capturer_frequency=networks[0].frequency, capturer_ht_type=networks[0].ht, host=self._host, assoc_params=assoc_params, client=client, tries=tries, debug_info=debug_string, # Copy all logs from the system disabled_sysinfo=disabled_sysinfo, conn_worker=conn_worker, tag=ap.ssid if conn_worker is None else '%s.%s' % (conn_worker.name, ap.ssid)) utils.release_ap(ap, batch_locker) if conn_worker is not None: conn_worker.cleanup() if not healthy_dut: continue batch_locker.unlock_aps() capturer.close() factory = ap_configurator_factory.APConfiguratorFactory(test_type) factory.turn_off_all_routers([])
def run(self, job, tries=10, capturer_hostname=None, conn_worker_hostnames=[], release_version="", disabled_sysinfo=False): """Executes Clique test. @param job: an Autotest job object. @param tries: an integer, number of iterations to run per AP. @param capturer_hostname: a string or None, hostname or IP of capturer. @param conn_worker_hostnames: a list of string, hostname of connection workers. @param release_version: the DUT cros image version to use for testing. @param disabled_sysinfo: a bool, disable collection of logs from DUT. """ lock_manager = host_lock_manager.HostLockManager() with host_lock_manager.HostsLockedBy(lock_manager): dut_locker = clique_dut_locker.CliqueDUTBatchLocker( lock_manager, self._dut_pool_spec) dut_objects = self._allocate_dut_pool(dut_locker) if not dut_objects: raise error.TestError('No DUTs allocated for test.') update_status = self._update_dut_pool(dut_objects, release_version) if not update_status: raise error.TestError('DUT pool update failed. Bailing!') capture_host = utils.allocate_packet_capturer( lock_manager, hostname=capturer_hostname) capturer = site_linux_system.LinuxSystem(capture_host, {}, 'packet_capturer') conn_workers = [] for hostname in conn_worker_hostnames: conn_worker_host = utils.allocate_packet_capturer( lock_manager, hostname=hostname) # Let's create generic connection workers and make them connect # to the corresponding AP. The DUT role will recast each of # these connection workers based on the role we want them to # perform. conn_worker = connection_worker.ConnectionWorker() conn_worker.prepare_work_client(conn_worker_host) conn_workers.append(conn_worker) aps = [] for ap_spec in self._ap_specs: ap_locker = ap_batch_locker.ApBatchLocker( lock_manager, ap_spec, ap_test_type=ap_constants.AP_TEST_TYPE_CLIQUE) ap = ap_locker.get_ap_batch(batch_size=1) if not ap: raise error.TestError('AP matching spec not found.') aps.append(ap) # Reset all the DUTs before the test starts and configure all the # APs. self._sanitize_all_duts(dut_objects) utils.configure_aps(aps, self._ap_specs) # This is a list of association parameters for the test for all the # APs in the test. assoc_params_list = [] # Check if all our APs, DUTs and connection workers are in good # state before we proceed. for ap, ap_spec in zip(aps, self._ap_specs): if ap.ssid == None: self._cleanup(dut_objects, dut_locker, ap_locker, capturer, conn_workers) raise error.TestError('SSID not set for the AP: %s.' % ap.configurator.host_name) networks = utils.return_available_networks( ap, ap_spec, capturer, job) if ((networks is None) or (networks == list())): self._cleanup(dut_objects, dut_locker, ap_locker, capturer, conn_workers) raise error.TestError('Scanning error on the AP %s.' % ap.configurator.host_name) assoc_params = ap.get_association_parameters() assoc_params_list.append(assoc_params) if not self._are_all_duts_healthy(dut_objects, ap): self._cleanup(dut_objects, dut_locker, ap_locker, capturer, conn_workers) raise error.TestError('Not all DUTs healthy.') if not self._are_all_conn_workers_healthy(conn_workers, aps, assoc_params_list, job): self._cleanup(dut_objects, dut_locker, ap_locker, capturer, conn_workers) raise error.TestError('Not all connection workers healthy.') debug_string = self._get_debug_string(dut_objects, aps) self._sync_time_on_all_duts(dut_objects) result = job.run_test( self._test, capturer=capturer, capturer_frequency=networks[0].frequency, capturer_ht_type=networks[0].ht, dut_pool=self._dut_pool, assoc_params_list=assoc_params_list, tries=tries, debug_info=debug_string, conn_workers=conn_workers, # Copy all logs from the system disabled_sysinfo=disabled_sysinfo) # Reclaim all the APs, DUTs and capturers used in the test and # collect the required logs. self._cleanup(dut_objects, dut_locker, ap_locker, capturer, conn_workers)