def check_pool_state(pool_name, **kwargs): """ :param pool_name: :param kwargs: :return: """ down_servers = kwargs.get('down_servers') disabled_servers = kwargs.get('disabled_servers') error_string = kwargs.get('error_string') retry_timeout = int(kwargs.get('retry_timeout', 10)) retry_interval = float(kwargs.get('retry_interval', 1)) try: @logger_utils.aretry(delay=retry_interval, period=retry_timeout) def retry_action(): return is_pool_servers_in_state(pool_name, down_servers, disabled_servers, error_string) return retry_action() except Exception as e: stats = get_all_pool_stats(pool_name) logger.debug("Failure case : All pool stats %s" % stats) logger_utils.error('Did not find pool in expected state after retry ' 'timeout of %s, down servers: %s, failed with ' 'error: %s' % (retry_timeout, down_servers, e))
def udp_server(servers, port, **kwargs): """ API Helps to start UDP Listen on given Server and Port Args: :param servers: list of server handles want to Listen on Server :type servers: List :param port: listener port :type port: int/str Raises: KeyError """ if not servers: servers = get_all_server_handle() elif isinstance(servers, basestring): servers = [servers] for server_handle in servers: server = get_server_by_handle(server_handle) server_vm = server.vm() server_ip = server.ip() cmd = 'python /root/common/scripts/udp_server.py --ip %s --p %s &> /tmp/udp_server_out_%s &' % ( server_ip, port, server_ip) logger.info("udp_server command: %s" % cmd) server_vm.execute_command(cmd, log_error=False) sleep(10) out = server_vm.execute_command( 'ps aux | grep \'udp_server.py\' | grep -v grep ') if not out: fail("UDP Server Process not started .. %s " % out) out = server_vm.execute_command('cat /tmp/udp_server_out_%s' % server_ip) if 'starting' not in ''.join(out): error("UDP Server not started .. %s " % out)
def check_cloud_state(expected_status='CLOUD_STATE_PLACEMENT_READY', **kwargs): cloud_name = kwargs.get('cloud_name', None) # config = get_config() # ctrl_clouds = kwargs.get('clouds', config.testbed[config.site_name].cloud) asleep(msg='waiting for cloud state', delay=10) status_code, resp_json = get('cloud-inventory') #resp_json = resp.json() #if len(ctrl_clouds) != int(resp_json['count']): # error("Number of Configured Clouds not as Received. Configured=%s Received=%s" %(len(ctrl_clouds), resp_json['count'])) for cloud_obj in resp_json['results']: if cloud_name and cloud_name != cloud_obj['config']['name']: continue if 'error' in cloud_obj['status']: error('Received Error in cloud status %s' % cloud_obj['status']['error']) cloud_status = cloud_obj['status']['state'] last_reason = cloud_obj['status'].get('reason', '') # To handle special? case where cloud is up but about to be reconfigured # REVIEW any other reasons that we need to account for? if cloud_status != expected_status or 'Pending re-config' in last_reason: if cloud_obj['config']['vtype'] == 'CLOUD_AWS': asleep("additional delay for AWS cloud", delay=30) error( 'Cloud Status is not as expected or reason not null. Expected=%s Received=%s, reason = %s', expected_status, cloud_status, last_reason) return True
def change_role_privileges_and_expect_failure(role_name, resource_name, access, **kwargs): """ API to Change the User Role Privileges and Expect to Failure. Args: :param role_name: User access role name :type role_name: str :param resource_name: Privilege/Resource name :type resource_name: str :param access: User Access Mode :type access: str Returns: Success - Return Failure message Failure - Raise Error with relative message """ status_code, resp = change_role_privileges(role_name, resource_name, access, check_status_code=False, **kwargs) if status_code == 400 and 'Cannot modify Role' in str(resp): logger.info( "Expected Failure with status code 400 and Cannot modify Role in error message" ) else: error( 'Status code Expected=400 Received=%s and error msg Exepcted=Cannot modify Role \ Received=%s' % (status_code, str(resp)))
def get_uuid_from_ref(url_ref=None): if not url_ref: error("URL ref is None") out = str(url_ref).split('/')[-1] if '#' in out: out = out.split('#') out = out[0] return out
def test_error(self): ''' error() is continuable ''' logger.info('error once') error('error 1') Test_Failures.error_count = 1 logger.info('error twice') error('error 2') Test_Failures.error_count = 2 logger.error_list = [] # reset to not actually fail
def upload_file(file_path, directory, should_pass=True, expected_error=None): try: _upload_multipart_file_form(file_path, directory) except Exception as e: if not should_pass: if expected_error and expected_error not in str(e): raise e else: error("File upload should have passed but got: " + str(e))
def wait_for_vcenter_state(cloud_name, exp_states): status_code, data = get('vimgrvcenterruntime') logger.debug('wait_for_vcenter_state data: %s' % data) if data['count'] == 0: error('Inventory is not complete') state = data['results'][0]['inventory_state'] if state in exp_states: return True else: error('Check for state %s one more time got %s' % (exp_states, state))
def get_vm_cloud_sdkconn(vm_name): config = get_config() site_name = config.get_mode(key='site_name') tb_json = config.testbed[site_name].tb_json vm_json, cloud_json = get_vm_and_cloud_json(vm_name, tb_json) vm_cloud_sdk_conn = get_vm_cloud_sdk(cloud_json=cloud_json, vm_json=vm_json) if not vm_cloud_sdk_conn: error('Expected non-None vm_cloud_sdk_conn for vm %s' % vm_name) return vm_cloud_sdk_conn
def log_count_should_increase_by(vs_name, initial_log_count, expected_increase_log_count, **kwargs): ''' Blocks until log count has increased by expected_log_count or retry_timeout time is reached ''' logger.info('initial_log_count: %s' % initial_log_count) logger.info('expected_increase_log_count: %s' % expected_increase_log_count) resp = has_log_count_increased_by(vs_name, initial_log_count, expected_increase_log_count, **kwargs) if not resp: logger_utils.error( 'Log count did not increase by expected value vs %s' % vs_name) else: return resp
def poweroff(self, vm_name=None): """ Power off openstack vm """ if not vm_name: vm_name = self.vm_json.get('name') vm = None try: vm = self.nova.servers.find(name='%s' % vm_name) logger.info('vm state %s' %vm.status) if vm.status == 'SHUTOFF': logger.info('vm is already powered off, return') return True except Exception: error("can't find vm %s in openstack, exp: %s" % (vm_name, e)) if vm: logger.debug('Found vm: %s to poweroff' % vm_name) vm.stop() return (self.check_vm_status(vm_name, 'SHUTOFF'))
def is_there_IO_error(client_range, log_file='httptest_io_error*', raise_exception=False): """ While traffic genearation IO errors are generally logged at /tmp/httptest_<timestamp>.log. The function checks if the log file is present or not. """ if isinstance(log_file, basestring): logs = [log_file] else: logger_utils.fail( 'HttpTest failed. Error - Log file should be of type string, but got : %s' % log_file) for _log_file in logs: logger.info('is_there_IO_error: %s\n' % log_file) clients = get_clients_from_range(client_range) vm, ip = traffic_manager.get_client_by_handle(clients[0]) logger.debug('VM IP, NAME, CLIENT: %s, %s, %s' % (vm.ip, vm.name, ip)) cmd = 'tail -5 %s' % log_file resp = vm.execute_command(cmd) if len(resp) > 0 and raise_exception: error_msg = 'Get request failed\n' for error in resp: try: msg = json.loads(error) except Exception: # When httptest fails, it doesn't write error log in json # format. logger_utils.error('HttpTest failed. Error - %s' % error) error_msg += 'Client: %s\nValidation: %s\nExpected: %s\nActual: ' \ '%s\n\n' % (msg['client'], msg['error_code'], msg['expected'], msg['actual']) # Cleaning up before raising exception vm.execute_command('rm %s &> /tmp/httptest' % log_file) logger_utils.error(error_msg) else: if len(resp) == 0: return 'False' else: logger.info('Failures: %s' % resp) return 'True'
def clean_secondary_ips_on_client_server(tb_json): client_server_vms = [vm_json for vm_json in tb_json.get('Vm') \ if vm_json.get('type') in ['client', 'server']] for vms in client_server_vms: cloud_name = vms.get('cloud_name', 'Default-Cloud') try: cloud_json = [cloud_json for cloud_json in tb_json.get('Cloud') \ if cloud_json.get('name') == cloud_name][0] except TypeError: logger.info('Must be no-access cloud?') except IndexError: logger.info("Can't find vm cloud under Clouds for %s" %vms['name']) if not cloud_json: cloud_json = None #Setting it back to None as it must have become an empty list try: # Check in vm clouds cloud_json = [cloud_json for cloud_json in tb_json.get('VmCloud') \ if cloud_json.get('name') == cloud_name][0] except TypeError: logger.info('no VmCloud defined in the testbed') if not cloud_json: logger.info('cloud_json None, no access cloud?') continue sdk_conn = None try: if cloud_json.get('vtype') == 'CLOUD_AWS': sdk_conn = Aws(cloud_configuration_json= cloud_json.get('aws_configuration')) logger.info("AWS SDK connection successfull for cloud %s" %cloud_json.get('name')) logger.info("Finding instance for %s " %vms['name']) instance = sdk_conn._Aws__get_instance(vms['name']) for interface in instance.interfaces: sec_ips = [ ip.private_ip_address for ip in interface.private_ip_addresses if ip.primary == False ] logger.info('Unassigning secondary IPs : %s from interface %s' % (sec_ips, interface.id)) if sec_ips: interface.connection.unassign_private_ip_addresses( network_interface_id=interface.id, private_ip_addresses=sec_ips) except Exception as e: error("clean_secondary_ips_on_client_server failed due to %s" %e) sdk_conn.disconnect()
def _upload_multipart_file_form(file_path, file_uri): if not os.path.exists(file_path): error('File not found: ' + file_path) #port = get_controller_port()i session = get_session() #port = session.port port = 443 path = 'https://%s:%s/api/fileservice/uploads' % (session.controller_ip, port) file_name = os.path.basename(file_path) with open(file_path, "rb") as f: f_data = { "file": (file_name, f, "application/octet-stream"), "uri": file_uri } m = MultipartEncoder(f_data) r = session.post(path, data=m) if r.status_code > 300: error('Fail to upload: ' + r.content)
def sg_del_retry(): try: sdk_conn.vpc.delete_security_group(group_id=sg_group.id) except Exception as e: error("Delete security group failed with exception %s " %e)
def ins_ter_retry(): try: instance.terminate() except Exception as e: error("Delete instance failed with exception %s " %e)
def get_controller_processes(controller, retry=5, role=None, **kwargs): """ Returns a list of pids for all controller processes :param controller: :param retry: :param role: :param kwargs: :return: """ if role is None: role = get_node_role(controller.ip) _ignore = ['redis-server', 'setup_system', 'nginx', 'snmpd', 'aviportal'] if role == 'CLUSTER_LEADER': _add = ['zookeeper', 'log_core_manager', 'vi-mgr', 'redis-server INSTANCE=5001'] else: _add = ['zookeeper', 'log_core_manager', 'redis-server INSTANCE=5001'] proc_names = get_controller_process_names(role, controller_vm=controller, **kwargs) proc_names = [proc for proc in proc_names if proc not in _ignore] proc_names.extend(_add) if role != 'CLUSTER_LEADER': proc_names.remove('redis-server INSTANCE=5001') procs = {} command_str = [] for proc in proc_names: try: if ':' in proc: service, instance = proc.split(':') command_str.append('sudo status %s INSTANCE=%s;' % (service, instance)) else: command_str.append('sudo status %s;' % proc) except Exception as e: logger_utils.fail("Unexpected error:", e.message) retries = 5 timeout = 300 elapsed_time = 0 resp = '' while retries: try: if rest.get_cloud_type() == 'baremetal': resp = controller.execute_on_docker_container(''.join(command_str)) resp = resp[controller.ip].splitlines() elif rest.get_cloud_type() == 'gcp': resp = controller.execute_on_docker_container(''.join(command_str)) resp = resp[controller.vm_public_ip].splitlines() else: resp = controller.execute_command(''.join(command_str)) except Exception as e: # Controller might have rebooted and we are reporting a reboot failure # so just reset all the processes and core links for VM logger_utils.asleep(delay=60) controller.processes = [] controller.latest_core = None logger_utils.error( 'Failed to connect to Controller: %s, %s' % (controller.ip, e)) except Exception as ie: # Not a valid job name? logger_utils.fail('Process not running on %s. Error: %s' % (controller.ip, ie.message)) except Exception as e: logger.info('other ex, retry: %s' % e) retries -= 1 logger_utils.asleep(delay=10) continue crashed_processes = [ proc for proc in resp if 'stop/waiting' in proc ] if len(crashed_processes)>0 and elapsed_time<timeout: logger_utils.asleep(delay=10) elapsed_time += 10 continue break # Response array contains blank items which need to be removed resp = [value for value in resp if value != ''] # Removed Process "\tpost-start process <pid>" from response\n # Pid values are captured by index but as post-start process gets introduced # in response it changes the pid index and hence it is removed for process_resp in resp: if 'post-start process' in process_resp: resp.remove(process_resp) current_proc = None try: for index, proc in enumerate(proc_names): current_proc = proc proc_pid = '' if proc in ['postgresql', 'postgresql_metrics']: if rest.get_cloud_type() == 'gcp': out = controller.execute_on_docker_container("cat '/var/run/%s.pid'" % proc) out = out[controller.vm_public_ip].splitlines() else: out = (controller.execute_command_fab("cat '/var/run/%s.pid'" % proc)) pid = str(out[0]) if len(out) > 0 else None if not pid or 'no such file' in pid.lower(): pass else: proc_pid = pid procname = proc.split()[0] procresp = '' if 'instance' in proc.lower(): instance_id = proc.split('=')[1] logger.info('Procname: %s InstanceId: %s' % (procname, instance_id)) for value in resp: if procname in value and '(%s)' % instance_id in value: procresp = value break else: for value in resp: resp_procname = value.split()[0] if proc in ['postgresql', 'postgresql_metrics']: procresp = proc_pid elif procname == resp_procname: procresp = value break logger.info('Proc: %s RespProc: %s' % (proc, procresp)) match = re.search('(\d+$)', procresp) if match: procs[proc] = (int(match.group(1))) else: raise IndexError except Exception as e: # If fails to parse output, then match.group raises attribute error # In case of crash, sleep for 5 secs to allow process to restart # avoiding cascading failures. if controller.processes: logger_utils.asleep(delay=5) retry -= 1 if retry < 1: del controller.processes[:] return get_controller_processes(controller, retry=retry, role=role, **kwargs) else: # The case where controller process not running at all! logger_utils.fail( 'process: %s is not running on controller %s!' % ( current_proc, controller.ip)) except Exception as ie: # Job not running or error while capturing output logger_utils.fail('Process %s not running on %s.' % (current_proc, controller.ip)) logger.info('get_controller_processes: %s' % procs) return procs
def img_del_retry(): try: image.deregister(delete_snapshot=True) except Exception as e: error("Delete image failed with exception %s " %e)
def pool_wellness_check(pool_name, t_state, t_num, t_up, t_enabled, skip_detail_check=0): """ :param pool_name: :param t_state: :param t_num: :param t_up: :param t_enabled: :param skip_detail_check: :return: """ logger_utils.asleep(msg='wait', delay=10) t_num = int(t_num) t_up = int(t_up) t_enabled = int(t_enabled) logger.info( '## start pool wellness check pool=%s t_state=%s t_num=%d t_up=%d t_enabled=%d' % (pool_name, t_state, t_num, t_up, t_enabled)) pool, pool_summary, pool_detail = cache_pool(pool_name) try: summary_oper_state = pool_summary['oper_status']['state'] if isinstance(pool_detail, list): detail_oper_state = pool_detail[0]['oper_status']['state'] pool_det = pool_detail[0] else: detail_oper_state = pool_detail['oper_status']['state'] pool_det = pool_detail except KeyError as err_msg: logger_utils.fail('## oper status not available: %s' % err_msg) if not skip_detail_check: if summary_oper_state != detail_oper_state: logger_utils.error( "Cache issue - summary state %s != detail state %s" % (summary_oper_state, detail_oper_state)) logger_utils.fail( "Cache issue - summary state %s != detail state %s" % (summary_oper_state, detail_oper_state)) if summary_oper_state != t_state: logger.debug("summary state %s != expected state %s" % (summary_oper_state, t_state)) logger_utils.fail("summary state %s != expected state %s" % (summary_oper_state, t_state)) if int(pool_summary['num_servers']) != t_num: logger.trace( "num servers mismatch (e-%d, s-%d, d-%d)" % (t_num, pool_summary['num_servers'], pool_det['num_servers'])) logger.fail( "num servers mismatch (e-%d, s-%d, d-%d)" % (t_num, pool_summary['num_servers'], pool_det['num_servers'])) if int(pool_summary['num_servers_enabled']) != t_enabled: logger.trace("num servers enabled mismatch (e-%d, s-%d, d-%d)" % (t_enabled, pool_summary['num_servers_enabled'], pool_det['num_servers_enabled'])) logger_utils.fail("num servers enabled mismatch (e-%d, s-%d, d-%d)" % (t_enabled, pool_summary['num_servers_enabled'], pool_det['num_servers_enabled'])) if int(pool_summary['num_servers_up']) != t_up: logger.trace( "num servers up mismatch (e-%d, s-%d, d-%d)" % (t_up, pool_summary['num_servers_up'], pool_det['num_servers_up'])) logger_utils.fail( "num servers up mismatch (e-%d, s-%d, d-%d)" % (t_up, pool_summary['num_servers_up'], pool_det['num_servers_up'])) return True
def ni_del_retry(): try: sdk_conn.vpc.delete_network_interface(ni.id) except Exception as e: error("Delete Network Interface failed with exception %s " %e)