def setup_dut(node): """Run script over SSH to setup the DUT node. :param node: DUT node to set up. :type node: dict :raises Exception: If the DUT setup fails. """ ssh = SSH() ssh.connect(node) (ret_code, stdout, stderr) = \ ssh.exec_command('sudo -Sn bash {0}/{1}/dut_setup.sh'. format(Constants.REMOTE_FW_DIR, Constants.RESOURCES_LIB_SH), timeout=120) logger.trace(stdout) logger.trace(stderr) if int(ret_code) != 0: logger.debug('DUT {0} setup script failed: "{1}"'.format( node['host'], stdout + stderr)) raise Exception('DUT test setup script failed at node {}'.format( node['host']))
def exec_the_udpfwd_test(dut_node, dut_if, file_prefix, \ dest_ip, is_ipv4=True): """Execute the udpfwd on the dut_node. :param dut_node: Will execute the udpfwd on this node. :param dut_if: DUT interface name. :param file_prefix: The test case config file prefix. :param dest_ip: The UDP packet dest IP. :param is_ipv4: Execute the IPv4 or IPv6 test. :type dut_node: dict :type dut_if: str :type file_prefix: str :type dest_ip: str :type is_ipv4: bool :returns: none. :raises RuntimeError: If failed to execute udpfwd test on the dut node. """ pci_address = Topology.get_interface_pci_addr(dut_node, dut_if) ssh = SSH() ssh.connect(dut_node) if is_ipv4: cmd = 'cd {0}/{4} && ./run_tldk.sh {0}/{5}/{2}_rx.pcap ' \ '{0}/{5}/{2}_tx.pcap {1} {0}/{5}/{2}_fe.cfg ' \ '{0}/{5}/{2}_be.cfg {3} NONE' \ .format(con.REMOTE_FW_DIR, pci_address, file_prefix, \ dest_ip, con.TLDK_SCRIPTS, con.TLDK_TESTCONFIG) else: cmd = 'cd {0}/{4} && ./run_tldk.sh {0}/{5}/{2}_rx.pcap ' \ '{0}/{5}/{2}_tx.pcap {1} {0}/{5}/{2}_fe.cfg ' \ '{0}/{5}/{2}_be.cfg NONE {3}' \ .format(con.REMOTE_FW_DIR, pci_address, file_prefix, \ dest_ip, con.TLDK_SCRIPTS, con.TLDK_TESTCONFIG) (ret_code, _, _) = ssh.exec_command(cmd, timeout=600) if ret_code != 0: raise RuntimeError( 'Failed to execute udpfwd test at node {0}'.format( dut_node['host']))
def get_vpp_pid(node): """Get PID of running VPP process. :param node: DUT node. :type node: dict :returns: PID :rtype: int :raises RuntimeError if it is not possible to get the PID. """ ssh = SSH() ssh.connect(node) for i in range(3): logger.trace('Try {}: Get VPP PID'.format(i)) #ret_code, stdout, stderr = ssh.exec_command('pidof vpp') ret_code, stdout, stderr = ssh.exec_command('pidof lt-vpp') if int(ret_code) != 0: raise RuntimeError('Not possible to get PID of VPP process ' 'on node: {0}\n {1}'.format( node['host'], stdout + stderr)) if len(stdout.splitlines()) == 1: return int(stdout) elif len(stdout.splitlines()) == 0: logger.debug("No VPP PID found on node {0}".format( node['host'])) continue else: logger.debug("More then one VPP PID found on node {0}".format( node['host'])) ret_list = list() for line in stdout.splitlines(): ret_list.append(int(line)) return ret_list return None
def pci_driver_bind(node, pci_addr, driver): """Bind PCI device to driver on node. :param node: DUT node. :param pci_addr: PCI device address. :param driver: Driver to bind. :type node: dict :type pci_addr: str :type driver: str :returns: nothing :raises RuntimeError: If PCI device bind failed. """ ssh = SSH() ssh.connect(node) ret_code, _, _ = ssh.exec_command( "sudo sh -c 'echo {} | tee /sys/bus/pci/drivers/{}/bind'".format( pci_addr, driver)) if int(ret_code) != 0: raise RuntimeError('Failed to bind PCI device to {} driver on ' 'host: {}'.format(driver, node['host']))
def install_dpdk_test(node): """ Prepare the DPDK test envrionment :param node: Dictionary created from topology :type node: dict :returns: nothing """ logger.console('Install the DPDK on {0}'.format(node['host'])) ssh = SSH() ssh.connect(node) (ret_code, _, stderr) = ssh.exec_command( 'cd {0}/dpdk-tests/dpdk_scripts/ && ./install_dpdk.sh'.format( con.REMOTE_FW_DIR), timeout=600) if ret_code != 0: logger.error('Install the DPDK error: {0}'.format(stderr)) raise Exception('Install the DPDK failed') else: logger.console('Install the DPDK on {0} success!'.format(node['host']))
def patch_l3fwd(node, patch): """ Patch l3fwd application and recompile. :param node: Dictionary created from topology. :param patch: Patch to apply. :type node: dict :type patch: str :raises RuntimeError: Patching of l3fwd failed. """ arch = Topology.get_node_arch(node) ssh = SSH() ssh.connect(node) ret_code, _, _ = ssh.exec_command( '{fwdir}/tests/dpdk/dpdk_scripts/patch_l3fwd.sh {arch} ' '{fwdir}/tests/dpdk/dpdk_scripts/{patch}'.format( fwdir=Constants.REMOTE_FW_DIR, arch=arch, patch=patch), timeout=600) if ret_code != 0: raise RuntimeError('Patch of l3fwd failed.')
def extract_tarball_at_node(tarball, node): """Extract tarball at given node. Extracts tarball using tar on given node to specific CSIT location. :param tarball: Path to tarball to upload. :param node: Dictionary created from topology. :type tarball: str :type node: dict :returns: nothing """ logger.console('Extracting tarball to {0} on {1}'.format( con.REMOTE_FW_DIR, node['host'])) ssh = SSH() ssh.connect(node) cmd = 'sudo rm -rf {1}; mkdir {1} ; tar -zxf {0} -C {1}; ' \ 'rm -f {0}'.format(tarball, con.REMOTE_FW_DIR) (ret_code, _, stderr) = ssh.exec_command(cmd, timeout=30) if ret_code != 0: logger.error('Unpack error: {0}'.format(stderr)) raise Exception('Failed to unpack {0} at node {1}'.format( tarball, node['host']))
def create_env_directory_at_node(node): """ Create fresh virtualenv to a directory, install pip requirements. :param node: Dictionary created from topology, will only install in the TG. :type node: dict :returns: nothing :raises RuntimeError: If the setup of virtualenv failed. """ logger.console('Extracting virtualenv, installing requirements.txt ' 'on {0}'.format(node['host'])) ssh = SSH() ssh.connect(node) (ret_code, stdout, stderr) = ssh.exec_command( 'cd {0} && rm -rf env && ' 'virtualenv --system-site-packages --never-download env && ' '. env/bin/activate && pip install -r requirements.txt' .format(con.REMOTE_FW_DIR), timeout=100) if ret_code != 0: logger.error('Virtualenv creation error: {0}'.format(stdout + stderr)) raise RuntimeError('Virtualenv setup failed') else: logger.console('Virtualenv created on {0}'.format(node['host']))
def install_tldk_test(node): """Prepare the TLDK test envrionment. Raise errors when failed. :param node: Dictionary created from topology. :type node: dict :returns: nothing. :raises RuntimeError: If install tldk failed. """ logger.console('Install the TLDK on {0}'.format(node['host'])) ssh = SSH() ssh.connect(node) (ret_code, _, stderr) = ssh.exec_command( 'cd {0}/{1} && ./install_tldk.sh' .format(con.REMOTE_FW_DIR, con.TLDK_SCRIPTS), timeout=600) if ret_code != 0: logger.error('Install the TLDK error: {0}'.format(stderr)) raise RuntimeError('Install the TLDK failed') else: logger.console('Install the TLDK on {0} success!'.format(node['host']))
def create_env_directory_at_node(node): """Create fresh virtualenv to a directory, install pip requirements. :param node: Node to create virtualenv on. :type node: dict :returns: nothing :raises RuntimeError: When failed to setup virtualenv. """ logger.console('Virtualenv setup including requirements.txt on {0}'.format( node['host'])) ssh = SSH() ssh.connect(node) (ret_code, _, _) = ssh.exec_command( 'cd {0} && rm -rf env && ' 'virtualenv --system-site-packages --never-download env && ' '. env/bin/activate && ' 'pip install -r requirements.txt'.format(con.REMOTE_FW_DIR), timeout=100) if ret_code != 0: raise RuntimeError( 'Virtualenv setup including requirements.txt on {0}'.format( node['host'])) logger.console('Virtualenv on {0} created'.format(node['host']))
def build_qemu(node, force_install=False, apply_patch=False): """Build QEMU from sources. :param node: Node to build QEMU on. :param force_install: If True, then remove previous build. :param apply_patch: If True, then apply patches from qemu_patches dir. :type node: dict :type force_install: bool :type apply_patch: bool :raises RuntimeError: If building QEMU failed. """ ssh = SSH() ssh.connect(node) directory = (' --directory={install_dir}{patch}'. format(install_dir=Constants.QEMU_INSTALL_DIR, patch='-patch' if apply_patch else '-base')) version = (' --version={install_version}'. format(install_version=Constants.QEMU_INSTALL_VERSION)) force = ' --force' if force_install else '' patch = ' --patch' if apply_patch else '' arch = Topology.get_node_arch(node) target_list = (' --target-list={arch}-softmmu'. format(arch=arch)) ret_code, _, _ = ssh.exec_command( "sudo -E sh -c '{fw_dir}/{lib_sh}/qemu_build.sh{version}{directory}" "{force}{patch}{target_list}'". format(fw_dir=Constants.REMOTE_FW_DIR, lib_sh=Constants.RESOURCES_LIB_SH, version=version, directory=directory, force=force, patch=patch, target_list=target_list), 1000) if int(ret_code) != 0: raise RuntimeError('QEMU build failed on {host}'. format(host=node['host']))
def get_pid(node, process): """Get PID of running process. :param node: DUT node. :param process: process name. :type node: dict :type process: str :returns: PID :rtype: int :raises RuntimeError: If it is not possible to get the PID. """ ssh = SSH() ssh.connect(node) retval = None for i in range(3): logger.trace(f"Try {i}: Get {process} PID") ret_code, stdout, stderr = ssh.exec_command(f"pidof {process}") if int(ret_code): raise RuntimeError( f"Not possible to get PID of {process} process on node: " f"{node[u'host']}\n {stdout + stderr}" ) pid_list = stdout.split() if len(pid_list) == 1: return [int(stdout)] if not pid_list: logger.debug(f"No {process} PID found on node {node[u'host']}") continue logger.debug(f"More than one {process} PID found " \ f"on node {node[u'host']}") retval = [int(pid) for pid in pid_list] return retval
def append_honeycomb_log(node, suite_name): """Append Honeycomb log for the current test suite to the full log. :param node: Honeycomb node. :param suite_name: Name of the current test suite. ${SUITE_NAME} variable in robotframework. :type node: dict :type suite_name: str """ ssh = SSH() ssh.connect(node) ssh.exec_command("echo '{separator}' >> /tmp/honeycomb.log".format( separator="=" * 80)) ssh.exec_command( "echo 'Log for suite: {suite}' >> /tmp/honeycomb.log".format( suite=suite_name)) ssh.exec_command("cat {hc_log} >> /tmp/honeycomb.log".format( hc_log=Const.REMOTE_HC_LOG))
def append_odl_log(node, odl_name, suite_name): """Append ODL karaf log for the current test suite to the full log. :param node: Honeycomb node. :param odl_name: Name of ODL client version to use. :param suite_name: Name of the current test suite. ${SUITE_NAME} variable in robotframework. :type node: dict :type odl_name: str :type suite_name: str """ ssh = SSH() ssh.connect(node) ssh.exec_command( "echo '{separator}' >> /tmp/karaf.log".format(separator="="*80)) ssh.exec_command( "echo 'Log for suite: {suite}' >> /tmp/karaf.log".format( suite=suite_name)) ssh.exec_command( "cat /tmp/karaf_{odl_name}/data/log/karaf.log >> /tmp/karaf.log" .format(odl_name=odl_name))
def install_vpp_on_all_duts(nodes, vpp_pkg_dir, vpp_rpm_pkgs, vpp_deb_pkgs): """Install VPP on all DUT nodes. :param nodes: Nodes in the topology. :param vpp_pkg_dir: Path to directory where VPP packages are stored. :param vpp_rpm_pkgs: List of VPP rpm packages to be installed. :param vpp_deb_pkgs: List of VPP deb packages to be installed. :type nodes: dict :type vpp_pkg_dir: str :type vpp_rpm_pkgs: list :type vpp_deb_pkgs: list :raises RuntimeError: If failed to remove or install VPP. """ for node in nodes.values(): if node['type'] == NodeType.DUT: logger.debug("Installing VPP on node {0}".format(node['host'])) ssh = SSH() ssh.connect(node) cmd = "[[ -f /etc/redhat-release ]]" return_code, _, _ = ssh.exec_command(cmd) if not int(return_code): # workaroud - uninstall existing vpp installation until # start-testcase script is updated on all virl servers rpm_pkgs_remove = "vpp*" cmd_u = 'yum -y remove "{0}"'.format(rpm_pkgs_remove) r_rcode, _, r_err = ssh.exec_command_sudo(cmd_u, timeout=90) if int(r_rcode): raise RuntimeError( 'Failed to remove previous VPP' 'installation on host {0}:\n{1}'.format( node['host'], r_err)) rpm_pkgs = "*.rpm ".join( str(vpp_pkg_dir + pkg) for pkg in vpp_rpm_pkgs) + "*.rpm" cmd_i = "rpm -ivh {0}".format(rpm_pkgs) ret_code, _, err = ssh.exec_command_sudo(cmd_i, timeout=90) if int(ret_code): raise RuntimeError('Failed to install VPP on host {0}:' '\n{1}'.format(node['host'], err)) else: ssh.exec_command_sudo("rpm -qai vpp*") logger.info("VPP installed on node {0}".format( node['host'])) else: # workaroud - uninstall existing vpp installation until # start-testcase script is updated on all virl servers deb_pkgs_remove = "vpp*" cmd_u = 'apt-get purge -y "{0}"'.format(deb_pkgs_remove) r_rcode, _, r_err = ssh.exec_command_sudo(cmd_u, timeout=90) if int(r_rcode): raise RuntimeError( 'Failed to remove previous VPP' 'installation on host {0}:\n{1}'.format( node['host'], r_err)) deb_pkgs = "*.deb ".join( str(vpp_pkg_dir + pkg) for pkg in vpp_deb_pkgs) + "*.deb" cmd_i = "dpkg -i --force-all {0}".format(deb_pkgs) ret_code, _, err = ssh.exec_command_sudo(cmd_i, timeout=90) if int(ret_code): raise RuntimeError('Failed to install VPP on host {0}:' '\n{1}'.format(node['host'], err)) else: ssh.exec_command_sudo("dpkg -l | grep vpp") logger.info("VPP installed on node {0}".format( node['host'])) ssh.disconnect(node)
def check_huge_page(node, huge_mnt, mem_size, allocate=False): """Check if there is enough HugePages in system. If allocate is set to true, try to allocate more HugePages. :param node: Node in the topology. :param huge_mnt: HugePage mount point. :param mem_size: Requested memory in MB. :param allocate: Whether to allocate more memory if not enough. :type node: dict :type huge_mnt: str :type mem_size: str :type allocate: bool :raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages or increasing map count failed. """ # TODO: split function into smaller parts. ssh = SSH() ssh.connect(node) # Get huge pages information huge_size = DUTSetup.get_huge_page_size(node) huge_free = DUTSetup.get_huge_page_free(node, huge_size) huge_total = DUTSetup.get_huge_page_total(node, huge_size) # Check if memory reqested is available on host if (mem_size * 1024) > (huge_free * huge_size): # If we want to allocate hugepage dynamically if allocate: mem_needed = (mem_size * 1024) - (huge_free * huge_size) huge_to_allocate = ((mem_needed / huge_size) * 2) + huge_total max_map_count = huge_to_allocate * 4 # Increase maximum number of memory map areas a process may have ret_code, _, _ = ssh.exec_command_sudo( 'echo "{0}" | sudo tee /proc/sys/vm/max_map_count'.format( max_map_count)) if int(ret_code) != 0: raise RuntimeError( 'Increase map count failed on {host}'.format( host=node['host'])) # Increase hugepage count ret_code, _, _ = ssh.exec_command_sudo( 'echo "{0}" | sudo tee /proc/sys/vm/nr_hugepages'.format( huge_to_allocate)) if int(ret_code) != 0: raise RuntimeError( 'Mount huge pages failed on {host}'.format( host=node['host'])) # If we do not want to allocate dynamicaly end with error else: raise RuntimeError( 'Not enough free huge pages: {0}, {1} MB'.format( huge_free, huge_free * huge_size)) # Check if huge pages mount point exist has_huge_mnt = False ret_code, stdout, _ = ssh.exec_command('cat /proc/mounts') if int(ret_code) == 0: for line in stdout.splitlines(): # Try to find something like: # none /mnt/huge hugetlbfs rw,relatime,pagesize=2048k 0 0 mount = line.split() if mount[2] == 'hugetlbfs' and mount[1] == huge_mnt: has_huge_mnt = True break # If huge page mount point not exist create one if not has_huge_mnt: ret_code, _, _ = ssh.exec_command_sudo( 'mkdir -p {mnt}'.format(mnt=huge_mnt)) if int(ret_code) != 0: raise RuntimeError('Create mount dir failed on {host}'.format( host=node['host'])) ret_code, _, _ = ssh.exec_command_sudo( 'mount -t hugetlbfs -o pagesize=2048k none {mnt}'.format( mnt=huge_mnt)) if int(ret_code) != 0: raise RuntimeError('Mount huge pages failed on {host}'.format( host=node['host']))
class QemuUtils(object): """QEMU utilities.""" def __init__(self, qemu_id=1): self._qemu_id = qemu_id self._vhost_id = 0 self._ssh = None self._node = None # Qemu Options self._qemu_opt = {} # Path to QEMU binary. Use x86_64 by default self._qemu_opt['qemu_path'] = '/usr/bin/' self._qemu_opt['qemu_bin'] = 'qemu-system-x86_64' # QEMU Machine Protocol socket self._qemu_opt['qmp_sock'] = '/tmp/qmp{0}.sock'.format(self._qemu_id) # QEMU Guest Agent socket self._qemu_opt['qga_sock'] = '/tmp/qga{0}.sock'.format(self._qemu_id) # QEMU PID file self._qemu_opt['pid_file'] = '/tmp/qemu{0}.pid'.format(self._qemu_id) # Default 1 CPU. self._qemu_opt['smp'] = '-smp 1,sockets=1,cores=1,threads=1' # Daemonize the QEMU process after initialization. Default one # management interface. self._qemu_opt['options'] = '-cpu host -daemonize -enable-kvm ' \ '-machine pc,accel=kvm,usb=off,mem-merge=off ' \ '-net nic,macaddr=52:54:00:00:{0:02x}:ff -balloon none'\ .format(self._qemu_id) self._qemu_opt['ssh_fwd_port'] = 10021 + qemu_id # Default serial console port self._qemu_opt['serial_port'] = 4555 + qemu_id # Default 512MB virtual RAM self._qemu_opt['mem_size'] = 512 # Default huge page mount point, required for Vhost-user interfaces. self._qemu_opt['huge_mnt'] = '/mnt/huge' # Default do not allocate huge pages. self._qemu_opt['huge_allocate'] = False # Default image for CSIT virl setup self._qemu_opt['disk_image'] = '/var/lib/vm/vhost-nested.img' # Virtio queue count self._qemu_opt['queue_count'] = 1 # Virtio queue size self._qemu_opt['queue_size'] = None # VM node info dict self._vm_info = { 'type': NodeType.VM, 'port': self._qemu_opt['ssh_fwd_port'], 'username': '******', 'password': '******', 'interfaces': {}, } # Qemu Sockets self._socks = [self._qemu_opt.get('qmp_sock'), self._qemu_opt.get('qga_sock')] def qemu_set_path(self, path): """Set binary path for QEMU. :param path: Absolute path in filesystem. :type path: str """ self._qemu_opt['qemu_path'] = path def qemu_set_queue_count(self, count): """Set number of virtio queues. :param count: Number of virtio queues. :type count: int """ self._qemu_opt['queue_count'] = int(count) def qemu_set_queue_size(self, size): """Set RX/TX size of virtio queues. :param size: Size of virtio queues. :type size: int """ self._qemu_opt['queue_size'] = int(size) def qemu_set_smp(self, smp, cores, threads, sockets): """Set SMP option for QEMU. :param smp: Number of CPUs. :param cores: Number of CPU cores on one socket. :param threads: Number of threads on one CPU core. :param sockets: Number of discrete sockets in the system. :type smp: int :type cores: int :type threads: int :type sockets: int """ self._qemu_opt['smp'] = \ ('-smp {smp},cores={cores},threads={threads},sockets={sockets}'. format(smp=smp, cores=cores, threads=threads, sockets=sockets)) def qemu_set_ssh_fwd_port(self, fwd_port): """Set host port for guest SSH forwarding. :param fwd_port: Port number on host for guest SSH forwarding. :type fwd_port: int """ self._qemu_opt['ssh_fwd_port'] = fwd_port self._vm_info['port'] = fwd_port def qemu_set_serial_port(self, port): """Set serial console port. :param port: Serial console port. :type port: int """ self._qemu_opt['serial_port'] = port def qemu_set_mem_size(self, mem_size): """Set virtual RAM size. :param mem_size: RAM size in Mega Bytes. :type mem_size: int """ self._qemu_opt['mem_size'] = int(mem_size) def qemu_set_huge_mnt(self, huge_mnt): """Set hugefile mount point. :param huge_mnt: System hugefile mount point. :type huge_mnt: int """ self._qemu_opt['huge_mnt'] = huge_mnt def qemu_set_huge_allocate(self): """Set flag to allocate more huge pages if needed.""" self._qemu_opt['huge_allocate'] = True def qemu_set_disk_image(self, disk_image): """Set disk image. :param disk_image: Path of the disk image. :type disk_image: str """ self._qemu_opt['disk_image'] = disk_image def qemu_set_affinity(self, *host_cpus): """Set qemu affinity by getting thread PIDs via QMP and taskset to list of CPU cores. :param host_cpus: List of CPU cores. :type host_cpus: list """ qemu_cpus = self._qemu_qmp_exec('query-cpus')['return'] if len(qemu_cpus) != len(host_cpus): raise ValueError('Host CPU count must match Qemu Thread count') for qemu_cpu, host_cpu in zip(qemu_cpus, host_cpus): ret_code, _, _ = self._ssh.exec_command_sudo( 'taskset -pc {host_cpu} {thread_id}'. format(host_cpu=host_cpu, thread_id=qemu_cpu['thread_id'])) if int(ret_code) != 0: raise RuntimeError('Set affinity failed on {host}'. format(host=self._node['host'])) def qemu_set_scheduler_policy(self): """Set scheduler policy to SCHED_RR with priority 1 for all Qemu CPU processes. :raises RuntimeError: Set scheduler policy failed. """ qemu_cpus = self._qemu_qmp_exec('query-cpus')['return'] for qemu_cpu in qemu_cpus: ret_code, _, _ = self._ssh.exec_command_sudo( 'chrt -r -p 1 {thread_id}'. format(thread_id=qemu_cpu['thread_id'])) if int(ret_code) != 0: raise RuntimeError('Set SCHED_RR failed on {host}'. format(host=self._node['host'])) def qemu_set_node(self, node): """Set node to run QEMU on. :param node: Node to run QEMU on. :type node: dict """ self._node = node self._ssh = SSH() self._ssh.connect(node) self._vm_info['host'] = node['host'] arch = Topology.get_node_arch(node) self._qemu_opt['qemu_bin'] = 'qemu-system-{arch}'.format(arch=arch) def qemu_add_vhost_user_if(self, socket, server=True, mac=None, jumbo_frames=False): """Add Vhost-user interface. :param socket: Path of the unix socket. :param server: If True the socket shall be a listening socket. :param mac: Vhost-user interface MAC address (optional, otherwise is used auto-generated MAC 52:54:00:00:xx:yy). :param jumbo_frames: Set True if jumbo frames are used in the test. :type socket: str :type server: bool :type mac: str :type jumbo_frames: bool """ self._vhost_id += 1 # Create unix socket character device. chardev = (' -chardev socket,id=char{vhost_id},path={socket}{server}'. format(vhost_id=self._vhost_id, socket=socket, server=',server' if server is True else '')) self._qemu_opt['options'] += chardev # Create Vhost-user network backend. netdev = (' -netdev vhost-user,id=vhost{vhost_id},' 'chardev=char{vhost_id},queues={queue_count}'. format(vhost_id=self._vhost_id, queue_count=self._qemu_opt.get('queue_count'))) self._qemu_opt['options'] += netdev # If MAC is not specified use auto-generated MAC address based on # template 52:54:00:00:<qemu_id>:<vhost_id>, e.g. vhost1 MAC of QEMU # with ID 1 is 52:54:00:00:01:01 mac = ('52:54:00:00:{qemu_id:02x}:{vhost_id:02x}'. format(qemu_id=self._qemu_id, vhost_id=self._vhost_id))\ if mac is None else mac queue_size = (',rx_queue_size={queue_size},tx_queue_size={queue_size}'. format(queue_size=self._qemu_opt.get('queue_size')))\ if self._qemu_opt.get('queue_size') else '' # Create Virtio network device. device = (' -device virtio-net-pci,netdev=vhost{vhost_id},mac={mac},' 'mq=on,csum=off,gso=off,guest_tso4=off,guest_tso6=off,' 'guest_ecn=off,mrg_rxbuf={mbuf}{queue_size}'. format(vhost_id=self._vhost_id, mac=mac, mbuf='on,host_mtu=9200' if jumbo_frames else 'off', queue_size=queue_size)) self._qemu_opt['options'] += device # Add interface MAC and socket to the node dict if_data = {'mac_address': mac, 'socket': socket} if_name = 'vhost{vhost_id}'.format(vhost_id=self._vhost_id) self._vm_info['interfaces'][if_name] = if_data # Add socket to the socket list self._socks.append(socket) def _qemu_qmp_exec(self, cmd): """Execute QMP command. QMP is JSON based protocol which allows to control QEMU instance. :param cmd: QMP command to execute. :type cmd: str :returns: Command output in python representation of JSON format. The { "return": {} } response is QMP's success response. An error response will contain the "error" keyword instead of "return". """ # To enter command mode, the qmp_capabilities command must be issued. ret_code, stdout, _ = self._ssh.exec_command( 'echo "{{ \\"execute\\": \\"qmp_capabilities\\" }}' '{{ \\"execute\\": \\"{cmd}\\" }}" | ' 'sudo -S socat - UNIX-CONNECT:{qmp_sock}'. format(cmd=cmd, qmp_sock=self._qemu_opt.get('qmp_sock'))) if int(ret_code) != 0: raise RuntimeError('QMP execute "{cmd}" failed on {host}'. format(cmd=cmd, host=self._node['host'])) # Skip capabilities negotiation messages. out_list = stdout.splitlines() if len(out_list) < 3: raise RuntimeError('Invalid QMP output on {host}'. format(host=self._node['host'])) return json.loads(out_list[2]) def _qemu_qga_flush(self): """Flush the QGA parser state.""" ret_code, stdout, _ = self._ssh.exec_command( '(printf "\xFF"; sleep 1) | ' 'sudo -S socat - UNIX-CONNECT:{qga_sock}'. format(qga_sock=self._qemu_opt.get('qga_sock'))) if int(ret_code) != 0: raise RuntimeError('QGA flush failed on {host}'. format(host=self._node['host'])) if not stdout: return {} return json.loads(stdout.split('\n', 1)[0]) def _qemu_qga_exec(self, cmd): """Execute QGA command. QGA provide access to a system-level agent via standard QMP commands. :param cmd: QGA command to execute. :type cmd: str """ ret_code, stdout, _ = self._ssh.exec_command( '(echo "{{ \\"execute\\": \\"{cmd}\\" }}"; sleep 1) | ' 'sudo -S socat - UNIX-CONNECT:{qga_sock}'. format(cmd=cmd, qga_sock=self._qemu_opt.get('qga_sock'))) if int(ret_code) != 0: raise RuntimeError('QGA execute "{cmd}" failed on {host}'. format(cmd=cmd, host=self._node['host'])) if not stdout: return {} return json.loads(stdout.split('\n', 1)[0]) def _wait_until_vm_boot(self, timeout=60): """Wait until QEMU VM is booted. First try to flush qga until there is output. Then ping QEMU guest agent each 5s until VM booted or timeout. :param timeout: Waiting timeout in seconds (optional, default 60s). :type timeout: int """ start = time() while True: if time() - start > timeout: raise RuntimeError('timeout, VM {disk} not booted on {host}'. format(disk=self._qemu_opt['disk_image'], host=self._node['host'])) out = None try: out = self._qemu_qga_flush() except ValueError: logger.trace('QGA qga flush unexpected output {out}'. format(out=out)) # Empty output - VM not booted yet if not out: sleep(5) else: break while True: if time() - start > timeout: raise RuntimeError('timeout, VM with {disk} not booted ' 'on {host}'. format(disk=self._qemu_opt['disk_image'], host=self._node['host'])) out = None try: out = self._qemu_qga_exec('guest-ping') except ValueError: logger.trace('QGA guest-ping unexpected output {out}'. format(out=out)) # Empty output - VM not booted yet if not out: sleep(5) # Non-error return - VM booted elif out.get('return') is not None: break # Skip error and wait elif out.get('error') is not None: sleep(5) else: # If there is an unexpected output from QGA guest-info, try # again until timeout. logger.trace('QGA guest-ping unexpected output {out}'. format(out=out)) logger.trace('VM with {disk_image} booted on {host}'. format(disk_image=self._qemu_opt['disk_image'], host=self._node['host'])) def _update_vm_interfaces(self): """Update interface names in VM node dict.""" # Send guest-network-get-interfaces command via QGA, output example: # {"return": [{"name": "eth0", "hardware-address": "52:54:00:00:04:01"}, # {"name": "eth1", "hardware-address": "52:54:00:00:04:02"}]} out = self._qemu_qga_exec('guest-network-get-interfaces') interfaces = out.get('return') mac_name = {} if not interfaces: raise RuntimeError('Get VM {disk_image} interface list failed ' 'on {host}'. format(disk_image=self._qemu_opt['disk_image'], host=self._node['host'])) # Create MAC-name dict for interface in interfaces: if 'hardware-address' not in interface: continue mac_name[interface['hardware-address']] = interface['name'] # Match interface by MAC and save interface name for interface in self._vm_info['interfaces'].values(): mac = interface.get('mac_address') if_name = mac_name.get(mac) if if_name is None: logger.trace('Interface name for MAC {mac} not found'. format(mac=mac)) else: interface['name'] = if_name def qemu_start(self): """Start QEMU and wait until VM boot. .. note:: First set at least node to run QEMU on. :returns: VM node info. :rtype: dict """ # Qemu binary path bin_path = ('{qemu_path}{qemu_bin}'. format(qemu_path=self._qemu_opt.get('qemu_path'), qemu_bin=self._qemu_opt.get('qemu_bin'))) # Memory and huge pages mem = ('-object memory-backend-file,id=mem,size={mem_size}M,' 'mem-path={path},share=on -m {mem_size} -numa node,memdev=mem'. format(mem_size=self._qemu_opt.get('mem_size'), path=self._qemu_opt.get('huge_mnt'))) # Drive option drive = ('-drive file={disk_image},format=raw,cache=none,if=virtio' '{locking}'. format(disk_image=self._qemu_opt.get('disk_image'), locking=',file.locking=off'\ if self._qemu_version_is_greater('2.10') else '')) # SSH forwarding ssh = ('-net user,hostfwd=tcp::{ssh_fwd_port}-:22'. format(ssh_fwd_port=self._qemu_opt.get('ssh_fwd_port'))) # Setup QMP via unix socket qmp = ('-qmp unix:{qmp_sock},server,nowait'. format(qmp_sock=self._qemu_opt.get('qmp_sock'))) # Setup QGA via chardev (unix socket) and isa-serial channel qga = ('-chardev socket,path={qga_sock},server,nowait,id=qga0 ' '-device isa-serial,chardev=qga0'. format(qga_sock=self._qemu_opt.get('qga_sock'))) # Setup serial console serial = ('-chardev socket,host=127.0.0.1,port={serial_port},id=gnc0,' 'server,nowait -device isa-serial,chardev=gnc0'. format(serial_port=self._qemu_opt.get('serial_port'))) # Graphic setup graphic = '-monitor none -display none -vga none' # PID file pid = ('-pidfile {pid_file}'. format(pid_file=self._qemu_opt.get('pid_file'))) # By default check only if hugepages are available. # If 'huge_allocate' is set to true try to allocate as well. DUTSetup.check_huge_page(self._node, self._qemu_opt.get('huge_mnt'), self._qemu_opt.get('mem_size'), allocate=self._qemu_opt.get('huge_allocate')) # Run QEMU cmd = ('{bin_path} {smp} {mem} {ssh} {options} {drive} {qmp} {serial} ' '{qga} {graphic} {pid}'. format(bin_path=bin_path, smp=self._qemu_opt.get('smp'), mem=mem, ssh=ssh, options=self._qemu_opt.get('options'), drive=drive, qmp=qmp, serial=serial, qga=qga, graphic=graphic, pid=pid)) try: ret_code, _, _ = self._ssh.exec_command_sudo(cmd, timeout=300) if int(ret_code) != 0: raise RuntimeError('QEMU start failed on {host}'. format(host=self._node['host'])) # Wait until VM boot self._wait_until_vm_boot() except (RuntimeError, SSHTimeout): self.qemu_kill_all() self.qemu_clear_socks() raise logger.trace('QEMU started successfully.') # Update interface names in VM node dict self._update_vm_interfaces() # Return VM node dict return self._vm_info def qemu_quit(self): """Quit the QEMU emulator.""" out = self._qemu_qmp_exec('quit') err = out.get('error') if err is not None: raise RuntimeError('QEMU quit failed on {host}: {error}'. format(host=self._node['host'], error=json.dumps(err))) def qemu_system_powerdown(self): """Power down the system (if supported).""" out = self._qemu_qmp_exec('system_powerdown') err = out.get('error') if err is not None: raise RuntimeError( 'QEMU system powerdown failed on {host}: {error}'. format(host=self._node['host'], error=json.dumps(err))) def qemu_system_reset(self): """Reset the system.""" out = self._qemu_qmp_exec('system_reset') err = out.get('error') if err is not None: raise RuntimeError( 'QEMU system reset failed on {host}: {error}'. format(host=self._node['host'], error=json.dumps(err))) def qemu_kill(self): """Kill qemu process.""" # Note: in QEMU start phase there are 3 QEMU processes because we # daemonize QEMU self._ssh.exec_command_sudo('chmod +r {pid}'. format(pid=self._qemu_opt.get('pid_file'))) self._ssh.exec_command_sudo('kill -SIGKILL $(cat {pid})'. format(pid=self._qemu_opt.get('pid_file'))) # Delete PID file self._ssh.exec_command_sudo('rm -f {pid}'. format(pid=self._qemu_opt.get('pid_file'))) def qemu_kill_all(self, node=None): """Kill all qemu processes on DUT node if specified. :param node: Node to kill all QEMU processes on. :type node: dict """ if node: self.qemu_set_node(node) self._ssh.exec_command_sudo('pkill -SIGKILL qemu') def qemu_clear_socks(self): """Remove all sockets created by QEMU.""" # If serial console port still open kill process self._ssh.exec_command_sudo('fuser -k {serial_port}/tcp'. format(serial_port=\ self._qemu_opt.get('serial_port'))) # Delete all created sockets for socket in self._socks: self._ssh.exec_command_sudo('rm -f {socket}'. format(socket=socket)) def qemu_system_status(self): """Return current VM status. VM should be in following status: - debug: QEMU running on a debugger - finish-migrate: paused to finish the migration process - inmigrate: waiting for an incoming migration - internal-error: internal error has occurred - io-error: the last IOP has failed - paused: paused - postmigrate: paused following a successful migrate - prelaunch: QEMU was started with -S and guest has not started - restore-vm: paused to restore VM state - running: actively running - save-vm: paused to save the VM state - shutdown: shut down (and -no-shutdown is in use) - suspended: suspended (ACPI S3) - watchdog: watchdog action has been triggered - guest-panicked: panicked as a result of guest OS panic :returns: VM status. :rtype: str """ out = self._qemu_qmp_exec('query-status') ret = out.get('return') if ret is not None: return ret.get('status') else: err = out.get('error') raise RuntimeError('QEMU query-status failed on {host}: {error}'. format(host=self._node['host'], error=json.dumps(err))) def qemu_version(self): """Return Qemu version. :returns: Qemu version. :rtype: str """ # Qemu binary path bin_path = ('{qemu_path}{qemu_bin}'. format(qemu_path=self._qemu_opt.get('qemu_path'), qemu_bin=self._qemu_opt.get('qemu_bin'))) try: ret_code, stdout, _ = self._ssh.exec_command_sudo( '{bin_path} --version'. format(bin_path=bin_path)) if int(ret_code) != 0: raise RuntimeError('Failed to get QEMU version on {host}'. format(host=self._node['host'])) return re.match(r'QEMU emulator version ([\d.]*)', stdout).group(1) except (RuntimeError, SSHTimeout): self.qemu_kill_all() self.qemu_clear_socks() raise def _qemu_version_is_greater(self, version): """Compare Qemu versions. :returns: True if installed Qemu version is greater. :rtype: bool """ return StrictVersion(self.qemu_version()) > StrictVersion(version) @staticmethod def build_qemu(node, force_install=False, apply_patch=False): """Build QEMU from sources. :param node: Node to build QEMU on. :param force_install: If True, then remove previous build. :param apply_patch: If True, then apply patches from qemu_patches dir. :type node: dict :type force_install: bool :type apply_patch: bool :raises RuntimeError: If building QEMU failed. """ ssh = SSH() ssh.connect(node) directory = (' --directory={install_dir}{patch}'. format(install_dir=Constants.QEMU_INSTALL_DIR, patch='-patch' if apply_patch else '-base')) version = (' --version={install_version}'. format(install_version=Constants.QEMU_INSTALL_VERSION)) force = ' --force' if force_install else '' patch = ' --patch' if apply_patch else '' arch = Topology.get_node_arch(node) target_list = (' --target-list={arch}-softmmu'. format(arch=arch)) ret_code, _, _ = ssh.exec_command( "sudo -E sh -c '{fw_dir}/{lib_sh}/qemu_build.sh{version}{directory}" "{force}{patch}{target_list}'". format(fw_dir=Constants.REMOTE_FW_DIR, lib_sh=Constants.RESOURCES_LIB_SH, version=version, directory=directory, force=force, patch=patch, target_list=target_list), 1000) if int(ret_code) != 0: raise RuntimeError('QEMU build failed on {host}'. format(host=node['host']))
def main(): """Copy and installation of VPP packages.""" parser = argparse.ArgumentParser() parser.add_argument("-t", "--topo", required=True, help="Topology file") parser.add_argument("-d", "--directory", required=True, help="Installation directory") parser.add_argument("-p", "--packages", required=False, nargs='+', help="Packages paths to copy") parser.add_argument("-c", "--cancel", help="Cancel installation", action="store_true") args = parser.parse_args() topology_file = args.topo packages = args.packages install_dir = args.directory cancel_installation = args.cancel work_file = open(topology_file) topology = load(work_file.read())['nodes'] ssh = SSH() for node in topology: if topology[node]['type'] == "DUT": print "###TI host: {}".format(topology[node]['host']) ssh.connect(topology[node]) if cancel_installation: # Remove installation directory on DUT cmd = "rm -r {}".format(install_dir) stdout = ssh_ignore_error(ssh, cmd) print "###TI {}".format(stdout) cmd = "dpkg -l | grep vpp" ret, _, _ = ssh.exec_command(cmd) if ret == 0: # Try to fix interrupted installations cmd = 'dpkg --configure -a' stdout = ssh_no_error(ssh, cmd, sudo=True) print "###TI {}".format(stdout) # Try to remove installed vpp.* packages cmd = 'apt-get purge -y "vpp.*"' stdout = ssh_no_error(ssh, cmd, sudo=True) print "###TI {}".format(stdout) else: # Create installation directory on DUT cmd = "rm -r {0}; mkdir {0}".format(install_dir) stdout = ssh_no_error(ssh, cmd) print "###TI {}".format(stdout) # Copy packages from local path to installation dir for deb in packages: print "###TI scp: {}".format(deb) ssh.scp(local_path=deb, remote_path=install_dir) cmd = "dpkg -l | grep vpp" ret, _, _ = ssh.exec_command(cmd) if ret == 0: # Try to fix interrupted installations cmd = 'dpkg --configure -a' stdout = ssh_no_error(ssh, cmd, sudo=True) print "###TI {}".format(stdout) # Try to remove installed vpp.* packages cmd = 'apt-get purge -y "vpp.*"' stdout = ssh_no_error(ssh, cmd, sudo=True) print "###TI {}".format(stdout) # Installation of VPP deb packages cmd = "dpkg -i --force-all {}/*.deb".format(install_dir) stdout = ssh_no_error(ssh, cmd, sudo=True) print "###TI {}".format(stdout)
def apply_config(self, filename=None, waittime=5, retries=12, restart_vpp=True): """Generate and apply VPP configuration for node. Use data from calls to this class to form a startup.conf file and replace /etc/vpp/startup.conf with it on node. :param filename: Startup configuration file name. :param waittime: Time to wait for VPP to restart (default 5 seconds). :param retries: Number of times (default 12) to re-try waiting. :param restart_vpp: Whether to restart VPP. :type filename: str :type waittime: int :type retries: int :type restart_vpp: bool. :raises RuntimeError: If writing config file failed or restart of VPP failed or backup of VPP startup.conf failed. """ self.dump_config(self._nodeconfig) ssh = SSH() ssh.connect(self._node) if filename is None: filename = self._vpp_startup_conf if self._vpp_startup_conf_backup is not None: (ret, _, _) = \ ssh.exec_command('sudo cp {0} {1}'. format(self._vpp_startup_conf, self._vpp_startup_conf_backup)) if ret != 0: raise RuntimeError( 'Backup of config file failed on node {}'.format( self._hostname)) (ret, _, _) = \ ssh.exec_command('echo "{config}" | sudo tee {filename}'. format(config=self._vpp_config, filename=filename)) if ret != 0: raise RuntimeError('Writing config file failed to node {}'.format( self._hostname)) if restart_vpp: # Instead of restarting, we'll do separate start and stop # actions. This way we don't care whether VPP was running # to begin with. ssh.exec_command('sudo service {} stop'.format( self._vpp_service_name)) (ret, _, _) = \ ssh.exec_command('sudo service {} start' .format(self._vpp_service_name)) if ret != 0: raise RuntimeError('Restarting VPP failed on node {}'.format( self._hostname)) # Sleep <waittime> seconds, up to <retry> times, # and verify if VPP is running. for _ in range(retries): time.sleep(waittime) (ret, stdout, _) = \ ssh.exec_command('echo show hardware-interfaces | ' 'nc 0 5002 || echo "VPP not yet running"') if ret == 0 and stdout != 'VPP not yet running': break else: raise RuntimeError('VPP failed to restart on node {}'.format( self._hostname))
def run_wrk(tg_node, profile_name, tg_numa, test_type, warm_up=False): """Send the traffic as defined in the profile. :param tg_node: Traffic generator node. :param profile_name: The name of wrk traffic profile. :param tg_numa: Numa node on which wrk will run. :param test_type: The type of the tests: cps, rps, bw :param warm_up: If True, warm-up traffic is generated before test traffic. :type profile_name: str :type tg_node: dict :type tg_numa: int :type test_type: str :type warm_up: bool :returns: Message with measured data. :rtype: str :raises: RuntimeError if node type is not a TG. """ if tg_node[u"type"] != NodeType.TG: raise RuntimeError(u"Node type is not a TG.") # Parse and validate the profile profile_path = f"resources/traffic_profiles/wrk/{profile_name}.yaml" profile = WrkTrafficProfile(profile_path).traffic_profile cores = CpuUtils.cpu_list_per_node(tg_node, tg_numa) first_cpu = cores[profile[u"first-cpu"]] if len(profile[u"urls"]) == 1 and profile[u"cpus"] == 1: params = [ u"traffic_1_url_1_core", str(first_cpu), str(profile[u"nr-of-threads"]), str(profile[u"nr-of-connections"]), f"{profile[u'duration']}s", f"'{profile[u'header']}'", str(profile[u"timeout"]), str(profile[u"script"]), str(profile[u"latency"]), f"'{u' '.join(profile[u'urls'])}'" ] if warm_up: warm_up_params = deepcopy(params) warm_up_params[4] = u"10s" elif len(profile[u"urls"]) == profile[u"cpus"]: params = [ u"traffic_n_urls_n_cores", str(first_cpu), str(profile[u"nr-of-threads"]), str(profile[u"nr-of-connections"]), f"{profile[u'duration']}s", f"'{profile[u'header']}'", str(profile[u"timeout"]), str(profile[u"script"]), str(profile[u"latency"]), f"'{u' '.join(profile[u'urls'])}'" ] if warm_up: warm_up_params = deepcopy(params) warm_up_params[4] = u"10s" else: params = [ u"traffic_n_urls_m_cores", str(first_cpu), str(profile[u"cpus"] // len(profile[u"urls"])), str(profile[u"nr-of-threads"]), str(profile[u"nr-of-connections"]), f"{profile[u'duration']}s", f"'{profile[u'header']}'", str(profile[u"timeout"]), str(profile[u"script"]), str(profile[u"latency"]), f"'{u' '.join(profile[u'urls'])}'" ] if warm_up: warm_up_params = deepcopy(params) warm_up_params[5] = u"10s" args = u" ".join(params) ssh = SSH() ssh.connect(tg_node) if warm_up: warm_up_args = u" ".join(warm_up_params) ret, _, _ = ssh.exec_command( f"{Constants.REMOTE_FW_DIR}/resources/tools/wrk/wrk_utils.sh " f"{warm_up_args}", timeout=1800) if int(ret) != 0: raise RuntimeError(u"wrk runtime error.") sleep(60) ret, stdout, _ = ssh.exec_command( f"{Constants.REMOTE_FW_DIR}/resources/tools/wrk/wrk_utils.sh {args}", timeout=1800) if int(ret) != 0: raise RuntimeError('wrk runtime error.') stats = _parse_wrk_output(stdout) log_msg = u"\nMeasured values:\n" if test_type == u"cps": log_msg += u"Connections/sec: Avg / Stdev / Max / +/- Stdev\n" for item in stats[u"rps-stats-lst"]: log_msg += u" / ".join(map(str, item)) + u"\n" log_msg += f"Total cps: {stats[u'rps-sum']}cps\n" elif test_type == u"rps": log_msg += u"Requests/sec: Avg / Stdev / Max / +/- Stdev\n" for item in stats[u"rps-stats-lst"]: log_msg += u" / ".join(map(str, item)) + u"\n" log_msg += f"Total rps: {stats[u'rps-sum']}rps\n" elif test_type == u"bw": log_msg += f"Transfer/sec: {stats[u'bw-sum']}Bps" logger.info(log_msg) return log_msg
def trex_stl_start_remote_exec(self, duration, rate, framesize, traffic_type, async_call=False, latency=True, warmup_time=5): """Execute script on remote node over ssh to start traffic. :param duration: Time expresed in seconds for how long to send traffic. :param rate: Traffic rate expressed with units (pps, %) :param framesize: L2 frame size to send (without padding and IPG). :param traffic_type: Traffic profile. :param async_call: If enabled then don't wait for all incomming trafic. :param latency: With latency measurement. :param warmup_time: Warmup time period. :type duration: int :type rate: str :type framesize: str :type traffic_type: str :type async_call: bool :type latency: bool :type warmup_time: int :returns: Nothing :raises: RuntimeError in case of TG driver issue. """ ssh = SSH() ssh.connect(self._node) _async = "--async" if async_call else "" _latency = "--latency" if latency else "" _p0, _p1 = (2, 1) if self._ifaces_reordered else (1, 2) profile_path = ("{0}/resources/tools/t-rex/stream_profiles/" "{1}.py".format(Constants.REMOTE_FW_DIR, traffic_type)) (ret, stdout, _) = ssh.exec_command( "sh -c " "'{0}/resources/tools/t-rex/t-rex-stateless-profile.py " "--profile {1} " "--duration {2} " "--frame_size {3} " "--rate {4} " "--warmup_time {5} " "--port_0 {6} " "--port_1 {7} " "{8} " # --async "{9}'". # --latency format(Constants.REMOTE_FW_DIR, profile_path, duration, framesize, rate, warmup_time, _p0 - 1, _p1 - 1, _async, _latency), timeout=int(duration) + 60) if int(ret) != 0: raise RuntimeError('T-rex stateless runtime error') elif async_call: #no result self._received = None self._sent = None self._loss = None self._latency = None else: # last line from console output line = stdout.splitlines()[-1] self._result = line logger.info('TrafficGen result: {0}'.format(self._result)) self._received = self._result.split(', ')[1].split('=')[1] self._sent = self._result.split(', ')[2].split('=')[1] self._loss = self._result.split(', ')[3].split('=')[1] self._latency = [] self._latency.append(self._result.split(', ')[4].split('=')[1]) self._latency.append(self._result.split(', ')[5].split('=')[1])
def initialize_traffic_generator(self, tg_node, tg_if1, tg_if2, tg_if1_adj_node, tg_if1_adj_if, tg_if2_adj_node, tg_if2_adj_if, test_type, tg_if1_dst_mac=None, tg_if2_dst_mac=None): """TG initialization. :param tg_node: Traffic generator node. :param tg_if1: TG - name of first interface. :param tg_if2: TG - name of second interface. :param tg_if1_adj_node: TG if1 adjecent node. :param tg_if1_adj_if: TG if1 adjecent interface. :param tg_if2_adj_node: TG if2 adjecent node. :param tg_if2_adj_if: TG if2 adjecent interface. :param test_type: 'L2' or 'L3' - src/dst MAC address. :param tg_if1_dst_mac: Interface 1 destination MAC address. :param tg_if2_dst_mac: Interface 2 destination MAC address. :type tg_node: dict :type tg_if1: str :type tg_if2: str :type tg_if1_adj_node: dict :type tg_if1_adj_if: str :type tg_if2_adj_node: dict :type tg_if2_adj_if: str :type test_type: str :type tg_if1_dst_mac: str :type tg_if2_dst_mac: str :returns: nothing :raises: RuntimeError in case of issue during initialization. """ topo = Topology() if tg_node['type'] != NodeType.TG: raise RuntimeError('Node type is not a TG') self._node = tg_node if tg_node['subtype'] == NodeSubTypeTG.TREX: trex_path = "/opt/trex-core-2.25" ssh = SSH() ssh.connect(tg_node) (ret, stdout, stderr) = ssh.exec_command( "sudo -E sh -c '{}/resources/tools/t-rex/" "t-rex-installer.sh'".format(Constants.REMOTE_FW_DIR), timeout=1800) if int(ret) != 0: logger.error('trex installation failed: {0}'.format(stdout + stderr)) raise RuntimeError('Installation of TG failed') if1_pci = topo.get_interface_pci_addr(tg_node, tg_if1) if2_pci = topo.get_interface_pci_addr(tg_node, tg_if2) if1_mac = topo.get_interface_mac(tg_node, tg_if1) if2_mac = topo.get_interface_mac(tg_node, tg_if2) if test_type == 'L2': if1_adj_mac = if2_mac if2_adj_mac = if1_mac elif test_type == 'L3': if1_adj_mac = topo.get_interface_mac(tg_if1_adj_node, tg_if1_adj_if) if2_adj_mac = topo.get_interface_mac(tg_if2_adj_node, tg_if2_adj_if) else: raise ValueError("test_type unknown") if tg_if1_dst_mac is not None and tg_if2_dst_mac is not None: if1_adj_mac = tg_if1_dst_mac if2_adj_mac = tg_if2_dst_mac if min(if1_pci, if2_pci) != if1_pci: if1_mac, if2_mac = if2_mac, if1_mac if1_pci, if2_pci = if2_pci, if1_pci if1_adj_mac, if2_adj_mac = if2_adj_mac, if1_adj_mac self._ifaces_reordered = True if1_mac_hex = "0x" + if1_mac.replace(":", ",0x") if2_mac_hex = "0x" + if2_mac.replace(":", ",0x") if1_adj_mac_hex = "0x" + if1_adj_mac.replace(":", ",0x") if2_adj_mac_hex = "0x" + if2_adj_mac.replace(":", ",0x") (ret, stdout, stderr) = ssh.exec_command( "sudo sh -c 'cat << EOF > /etc/trex_cfg.yaml\n" "- port_limit : 2\n" " version : 2\n" " interfaces : [\"{}\",\"{}\"]\n" " port_info :\n" " - dest_mac : [{}]\n" " src_mac : [{}]\n" " - dest_mac : [{}]\n" " src_mac : [{}]\n" "EOF'"\ .format(if1_pci, if2_pci, if1_adj_mac_hex, if1_mac_hex, if2_adj_mac_hex, if2_mac_hex)) if int(ret) != 0: logger.error("failed to create t-rex config: {}"\ .format(stdout + stderr)) raise RuntimeError('trex config generation error') max_startup_retries = 3 while max_startup_retries > 0: # kill T-rex only if it is already running (ret, _, _) = ssh.exec_command( "sh -c 'pgrep t-rex && sudo pkill t-rex && sleep 3'") # configure T-rex (ret, stdout, stderr) = ssh.exec_command( "sh -c 'cd {0}/scripts/ && sudo ./trex-cfg'"\ .format(trex_path)) if int(ret) != 0: logger.error('trex-cfg failed: {0}'.format(stdout + stderr)) raise RuntimeError('trex-cfg failed') # start T-rex (ret, _, _) = ssh.exec_command( "sh -c 'cd {0}/scripts/ && " "sudo nohup ./t-rex-64 -i -c 7 --iom 0 > /dev/null 2>&1 &'" "> /dev/null"\ .format(trex_path)) if int(ret) != 0: raise RuntimeError('t-rex-64 startup failed') # get T-rex server info (ret, _, _) = ssh.exec_command( "sh -c 'sleep 3; " "{0}/resources/tools/t-rex/t-rex-server-info.py'"\ .format(Constants.REMOTE_FW_DIR), timeout=120) if int(ret) == 0: # If we get info T-rex is running return # try again max_startup_retries -= 1 # after max retries T-rex is still not responding to API # critical error occurred raise RuntimeError('t-rex-64 startup failed')
def run_wrk(tg_node, profile_name, tg_numa, test_type, warm_up=False): """Send the traffic as defined in the profile. :param tg_node: Traffic generator node. :param profile_name: The name of wrk traffic profile. :param tg_numa: Numa node on which wrk will run. :param test_type: The type of the tests: cps, rps, bw :param warm_up: If True, warm-up traffic is generated before test traffic. :type profile_name: str :type tg_node: dict :type tg_numa: int :type test_type: str :type warm_up: bool :returns: Message with measured data. :rtype: str :raises: RuntimeError if node type is not a TG. """ if tg_node['type'] != NodeType.TG: raise RuntimeError('Node type is not a TG.') # Parse and validate the profile profile_path = ( "resources/traffic_profiles/wrk/{0}.yaml".format(profile_name)) profile = WrkTrafficProfile(profile_path).traffic_profile cores = CpuUtils.cpu_list_per_node(tg_node, tg_numa) first_cpu = cores[profile["first-cpu"]] if len(profile["urls"]) == 1 and profile["cpus"] == 1: params = [ "traffic_1_url_1_core", str(first_cpu), str(profile["nr-of-threads"]), str(profile["nr-of-connections"]), "{0}s".format(profile["duration"]), "'{0}'".format(profile["header"]), str(profile["timeout"]), str(profile["script"]), str(profile["latency"]), "'{0}'".format(" ".join(profile["urls"])) ] if warm_up: warm_up_params = deepcopy(params) warm_up_params[4] = "10s" elif len(profile["urls"]) == profile["cpus"]: params = [ "traffic_n_urls_n_cores", str(first_cpu), str(profile["nr-of-threads"]), str(profile["nr-of-connections"]), "{0}s".format(profile["duration"]), "'{0}'".format(profile["header"]), str(profile["timeout"]), str(profile["script"]), str(profile["latency"]), "'{0}'".format(" ".join(profile["urls"])) ] if warm_up: warm_up_params = deepcopy(params) warm_up_params[4] = "10s" else: params = [ "traffic_n_urls_m_cores", str(first_cpu), str(profile["cpus"] / len(profile["urls"])), str(profile["nr-of-threads"]), str(profile["nr-of-connections"]), "{0}s".format(profile["duration"]), "'{0}'".format(profile["header"]), str(profile["timeout"]), str(profile["script"]), str(profile["latency"]), "'{0}'".format(" ".join(profile["urls"])) ] if warm_up: warm_up_params = deepcopy(params) warm_up_params[5] = "10s" args = " ".join(params) ssh = SSH() ssh.connect(tg_node) if warm_up: warm_up_args = " ".join(warm_up_params) ret, _, _ = ssh.exec_command( "{0}/resources/tools/wrk/wrk_utils.sh {1}".format( Constants.REMOTE_FW_DIR, warm_up_args), timeout=1800) if int(ret) != 0: raise RuntimeError('wrk runtime error.') sleep(60) ret, stdout, _ = ssh.exec_command( "{0}/resources/tools/wrk/wrk_utils.sh {1}".format( Constants.REMOTE_FW_DIR, args), timeout=1800) if int(ret) != 0: raise RuntimeError('wrk runtime error.') stats = _parse_wrk_output(stdout) log_msg = "\nMeasured values:\n" if test_type == "cps": log_msg += "Connections/sec: Avg / Stdev / Max / +/- Stdev\n" for item in stats["rps-stats-lst"]: log_msg += "{0} / {1} / {2} / {3}\n".format(*item) log_msg += "Total cps: {0}cps\n".format(stats["rps-sum"]) elif test_type == "rps": log_msg += "Requests/sec: Avg / Stdev / Max / +/- Stdev\n" for item in stats["rps-stats-lst"]: log_msg += "{0} / {1} / {2} / {3}\n".format(*item) log_msg += "Total rps: {0}rps\n".format(stats["rps-sum"]) elif test_type == "bw": log_msg += "Transfer/sec: {0}Bps".format(stats["bw-sum"]) logger.info(log_msg) return log_msg
def start_the_l3fwd_test(nodes_info, dut_node, dut_if1, dut_if2, nb_cores, lcores_list, queue_nums, jumbo_frames): """ Execute the l3fwd on the dut_node. :param nodes_info: All the nodes info in the topology file. :param dut_node: Will execute the l3fwd on this node :param dut_if1: The test link interface 1. :param dut_if2: The test link interface 2. :param nb_cores: The cores number for the forwarding :param lcores_list: The lcore list string for the l3fwd routing :param queue_nums: The queues number for the NIC :param jumbo_frames: Is jumbo frames or not. Accepted: yes / no :type nodes_info: dict :type dut_node: dict :type dut_if1: str :type dut_if2: str :type nb_cores: str :type lcores_list: str :type queue_nums: str :type jumbo_frames: str :return: none """ if_key0 = dut_if1 if_key1 = dut_if2 if_pci0 = Topology.get_interface_pci_addr(dut_node, if_key0) if_pci1 = Topology.get_interface_pci_addr(dut_node, if_key1) # detect which is the port 0 if min(if_pci0, if_pci1) != if_pci0: if_key0, if_key1 = if_key1, if_key0 if_pci0, if_pci1 = if_pci1, if_pci0 adj_node0, adj_if_key0 = Topology.get_adjacent_node_and_interface( \ nodes_info, dut_node, if_key0) adj_node1, adj_if_key1 = Topology.get_adjacent_node_and_interface( \ nodes_info, dut_node, if_key1) adj_mac0 = Topology.get_interface_mac(adj_node0, adj_if_key0) adj_mac1 = Topology.get_interface_mac(adj_node1, adj_if_key1) list_cores = lcores_list.split(',') # prepare the port config param index = 0 port_config = '' for port in range(0, 2): for queue in range(0, int(queue_nums)): if int(nb_cores) == 1: index = 0 temp_str = '({0}, {1}, {2}),'.format(port, queue, \ int(list_cores[index])) else: temp_str = '({0}, {1}, {2}),'.format(port, queue, \ int(list_cores[index])) port_config += temp_str index = index + 1 port_config_param = port_config.rstrip(',') ssh = SSH() ssh.connect(dut_node) cmd = 'cd {0}/tests/dpdk/dpdk_scripts/ && ./run_l3fwd.sh ' \ '"{1}" "{2}" {3} {4} {5}'.format(con.REMOTE_FW_DIR, lcores_list, \ port_config_param, adj_mac0, adj_mac1, jumbo_frames) (ret_code, _, stderr) = ssh.exec_command(cmd, timeout=600) if ret_code != 0: logger.error('Execute the l3fwd error: {0}'.format(stderr)) raise Exception('Failed to execute l3fwd test at node {0}'.format( dut_node['host']))
def apply_config(self, node, waittime=5, retries=12): """Generate and apply VPP configuration for node. Use data from calls to this class to form a startup.conf file and replace /etc/vpp/startup.conf with it on node. :param node: DUT node. :param waittime: Time to wait for VPP to restart (default 5 seconds). :param retries: Number of times (default 12) to re-try waiting. :type node: dict :type waittime: int :type retries: int :raises RuntimeError: If writing config file failed, or restarting of VPP failed. """ if node['type'] != NodeType.DUT: raise ValueError('Node type is not a DUT') hostname = Topology.get_node_hostname(node) cpuconfig = "" pciconfig = "" socketmemconfig = DEFAULT_SOCKETMEM_CONFIG heapsizeconfig = "" rxqueuesconfig = "" txqueuesconfig = "" nomultiseg = "" enablevhostuser = "" cryptodevconfig = "" uiodriverconfig = "" snatconfig = "" if hostname in self._nodeconfig: cfg = self._nodeconfig[hostname] if 'cpu_config' in cfg: cpuconfig = " " + "\n ".join(cfg['cpu_config']) if 'pci_addrs' in cfg: pciconfig = " dev " + "\n dev ".join(cfg['pci_addrs']) if 'socketmem_config' in cfg: socketmemconfig = cfg['socketmem_config'] if 'cryptodev_config' in cfg: cryptodevconfig = cfg['cryptodev_config'] if 'uio_driver_config' in cfg: uiodriverconfig = cfg['uio_driver_config'] if 'heapsize_config' in cfg: heapsizeconfig = "\nheapsize {}\n".\ format(cfg['heapsize_config']) if 'rxqueues_config' in cfg: rxqueuesconfig = " " + "\n ".join(cfg['rxqueues_config']) if 'no_multi_seg_config' in cfg: nomultiseg = " " + "\n ".join(cfg['no_multi_seg_config']) if 'enable_vhost_user' in cfg: enablevhostuser = "******" + "\n ".join(cfg['enable_vhost_user']) if 'snat_config' in cfg: snatconfig = "snat {\n" snatconfig += " " + "\n ".join(cfg['snat_config']) snatconfig += "\n}" vppconfig = VPP_CONFIG_TEMPLATE.format(cpuconfig=cpuconfig, pciconfig=pciconfig, cryptodevconfig=cryptodevconfig, uiodriverconfig=uiodriverconfig, socketmemconfig=socketmemconfig, heapsizeconfig=heapsizeconfig, rxqueuesconfig=rxqueuesconfig, txqueuesconfig=txqueuesconfig, nomultiseg=nomultiseg, enablevhostuser=enablevhostuser, snatconfig=snatconfig) logger.debug('Writing VPP config to host {}: "{}"'.format(hostname, vppconfig)) ssh = SSH() ssh.connect(node) # We're using this "| sudo tee" construct because redirecting # a sudo'd outut ("sudo echo xxx > /path/to/file") does not # work on most platforms... (ret, stdout, stderr) = \ ssh.exec_command('echo "{0}" | sudo tee {1}'. format(vppconfig, VPP_CONFIG_FILENAME)) if ret != 0: logger.debug('Writing config file failed to node {}'. format(hostname)) logger.debug('stdout: {}'.format(stdout)) logger.debug('stderr: {}'.format(stderr)) raise RuntimeError('Writing config file failed to node {}'. format(hostname)) # Instead of restarting, we'll do separate start and stop # actions. This way we don't care whether VPP was running # to begin with. ssh.exec_command('sudo service {} stop'.format(VPP_SERVICE_NAME)) (ret, stdout, stderr) = \ ssh.exec_command('sudo service {} start'.format(VPP_SERVICE_NAME)) if ret != 0: logger.debug('Restarting VPP failed on node {}'. format(hostname)) logger.debug('stdout: {}'.format(stdout)) logger.debug('stderr: {}'.format(stderr)) raise RuntimeError('Restarting VPP failed on node {}'. format(hostname)) # Sleep <waittime> seconds, up to <retry> times, # and verify if VPP is running. vpp_is_running = False retries_left = retries while (not vpp_is_running) and (retries_left > 0): time.sleep(waittime) retries_left -= 1 # FIXME: Need to find a good way to check if VPP is operational. # # If VatTerminal/VatExecutor is anything like vppctl or # vpp_api_test, then in case VPP is NOT running it will block for # 30 seconds or so and not even return if VPP becomes alive during # that time. This makes it unsuitable in this case. We either need # a call that returns immediately, indicating whether VPP is # healthy or not, or a call that waits (up to a defined length # of time) and returns immediately if VPP is or becomes healthy. (ret, stdout, stderr) = \ ssh.exec_command('echo show hardware-interfaces | ' 'nc 0 5002') if ret == 0: vpp_is_running = True else: logger.debug('VPP not yet running, {} retries left'. format(retries_left)) if retries_left == 0: raise RuntimeError('VPP failed to restart on node {}'. format(hostname)) logger.debug('VPP interfaces found on node {}'. format(stdout))
class HoneycombStartupConfig(object): """Generator for Honeycomb startup configuration. """ def __init__(self): """Initializer.""" self.template = """#!/bin/sh - STATUS=100 while [ $STATUS -eq 100 ] do {java_call} -jar $(dirname $0)/{jar_filename} STATUS=$? echo "Honeycomb exited with status: $STATUS" if [ $STATUS -eq 100 ] then echo "Restarting..." fi done """ self.java_call = "{scheduler} {affinity} java{jit_mode}{params}" self.scheduler = "" self.core_affinity = "" self.jit_mode = "" self.params = "" self.numa = "" self.config = "" self.ssh = SSH() def apply_config(self, node): """Generate configuration file /opt/honeycomb/honeycomb on the specified node. :param node: Honeycomb node. :type node: dict """ self.ssh.connect(node) _, filename, _ = self.ssh.exec_command("ls /opt/honeycomb | grep .jar") java_call = self.java_call.format(scheduler=self.scheduler, affinity=self.core_affinity, jit_mode=self.jit_mode, params=self.params) self.config = self.template.format(java_call=java_call, jar_filename=filename) self.ssh.connect(node) cmd = "echo '{config}' > /tmp/honeycomb " \ "&& chmod +x /tmp/honeycomb " \ "&& sudo mv -f /tmp/honeycomb /opt/honeycomb".\ format(config=self.config) self.ssh.exec_command(cmd) def set_cpu_scheduler(self, scheduler="FIFO"): """Use alternate CPU scheduler. Note: OTHER scheduler doesn't load-balance over isolcpus. :param scheduler: CPU scheduler to use. :type scheduler: str """ schedulers = { "FIFO": "-f 99", # First In, First Out "RR": "-r 99", # Round Robin "OTHER": "-o", # Ubuntu default } self.scheduler = "chrt {0}".format(schedulers[scheduler]) def set_cpu_core_affinity(self, low, high=None): """Set core affinity for the honeycomb process and subprocesses. :param low: Lowest core ID number. :param high: Highest core ID number. Leave empty to use a single core. :type low: int :type high: int """ self.core_affinity = "taskset -c {low}-{high}".format( low=low, high=high if high else low) def set_jit_compiler_mode(self, jit_mode): """Set running mode for Java's JIT compiler. :param jit_mode: Desiret JIT mode. :type jit_mode: str """ modes = { "client": " -client", # Default "server": " -server", # Higher performance but longer warmup "classic": " -classic" # Disables JIT compiler } self.jit_mode = modes[jit_mode] def set_memory_size(self, mem_min, mem_max=None): """Set minimum and maximum memory use for the JVM. :param mem_min: Minimum amount of memory (MB). :param mem_max: Maximum amount of memory (MB). Default is 4 times minimum value. :type mem_min: int :type mem_max: int """ self.params += " -Xms{min}m -Xmx{max}m".format( min=mem_min, max=mem_max if mem_max else mem_min * 4) def set_metaspace_size(self, mem_min, mem_max=None): """Set minimum and maximum memory used for class metadata in the JVM. :param mem_min: Minimum metaspace size (MB). :param mem_max: Maximum metaspace size (MB). Defailt is 4 times minimum value. :type mem_min: int :type mem_max: int """ self.params += " -XX:MetaspaceSize={min}m " \ "-XX:MaxMetaspaceSize={max}m".format( min=mem_min, max=mem_max if mem_max else mem_min*4) def set_numa_optimization(self): """Use optimization of memory use and garbage collection for NUMA architectures.""" self.params += " -XX:+UseNUMA -XX:+UseParallelGC" def set_ssh_security_provider(self): """Disables BouncyCastle for SSHD.""" # Workaround for issue described in: # https://wiki.fd.io/view/Honeycomb/Releases/1609/Honeycomb_and_ODL self.params += " -Dorg.apache.sshd.registerBouncyCastle=false"
class QemuUtils(object): """QEMU utilities.""" def __init__(self, qemu_id=1): self._qemu_id = qemu_id # Path to QEMU binary self._qemu_bin = '/usr/bin/qemu-system-x86_64' # QEMU Machine Protocol socket self._qmp_sock = '/tmp/qmp{0}.sock'.format(self._qemu_id) # QEMU Guest Agent socket self._qga_sock = '/tmp/qga{0}.sock'.format(self._qemu_id) # QEMU PID file self._pid_file = '/tmp/qemu{0}.pid'.format(self._qemu_id) self._qemu_opt = {} # Default 1 CPU. self._qemu_opt['smp'] = '-smp 1,sockets=1,cores=1,threads=1' # Daemonize the QEMU process after initialization. Default one # management interface. self._qemu_opt['options'] = '-cpu host -daemonize -enable-kvm ' \ '-machine pc,accel=kvm,usb=off,mem-merge=off ' \ '-net nic,macaddr=52:54:00:00:{0:02x}:ff -balloon none'\ .format(self._qemu_id) self._qemu_opt['ssh_fwd_port'] = 10021 + qemu_id # Default serial console port self._qemu_opt['serial_port'] = 4555 + qemu_id # Default 512MB virtual RAM self._qemu_opt['mem_size'] = 512 # Default huge page mount point, required for Vhost-user interfaces. self._qemu_opt['huge_mnt'] = '/mnt/huge' # Default do not allocate huge pages. self._qemu_opt['huge_allocate'] = False # Default image for CSIT virl setup self._qemu_opt['disk_image'] = '/var/lib/vm/vhost-nested.img' # VM node info dict self._vm_info = { 'type': NodeType.VM, 'port': self._qemu_opt['ssh_fwd_port'], 'username': '******', 'password': '******', 'interfaces': {}, } # Virtio queue count self._qemu_opt['queues'] = 1 self._vhost_id = 0 self._ssh = None self._node = None self._socks = [self._qmp_sock, self._qga_sock] def qemu_set_bin(self, path): """Set binary path for QEMU. :param path: Absolute path in filesystem. :type path: str """ self._qemu_bin = path def qemu_set_smp(self, cpus, cores, threads, sockets): """Set SMP option for QEMU. :param cpus: Number of CPUs. :param cores: Number of CPU cores on one socket. :param threads: Number of threads on one CPU core. :param sockets: Number of discrete sockets in the system. :type cpus: int :type cores: int :type threads: int :type sockets: int """ self._qemu_opt[ 'smp'] = '-smp {},cores={},threads={},sockets={}'.format( cpus, cores, threads, sockets) def qemu_set_ssh_fwd_port(self, fwd_port): """Set host port for guest SSH forwarding. :param fwd_port: Port number on host for guest SSH forwarding. :type fwd_port: int """ self._qemu_opt['ssh_fwd_port'] = fwd_port self._vm_info['port'] = fwd_port def qemu_set_serial_port(self, port): """Set serial console port. :param port: Serial console port. :type port: int """ self._qemu_opt['serial_port'] = port def qemu_set_mem_size(self, mem_size): """Set virtual RAM size. :param mem_size: RAM size in Mega Bytes. :type mem_size: int """ self._qemu_opt['mem_size'] = int(mem_size) def qemu_set_huge_mnt(self, huge_mnt): """Set hugefile mount point. :param huge_mnt: System hugefile mount point. :type huge_mnt: int """ self._qemu_opt['huge_mnt'] = huge_mnt def qemu_set_huge_allocate(self): """Set flag to allocate more huge pages if needed.""" self._qemu_opt['huge_allocate'] = True def qemu_set_disk_image(self, disk_image): """Set disk image. :param disk_image: Path of the disk image. :type disk_image: str """ self._qemu_opt['disk_image'] = disk_image def qemu_set_affinity(self, *host_cpus): """Set qemu affinity by getting thread PIDs via QMP and taskset to list of CPU cores. :param host_cpus: List of CPU cores. :type host_cpus: list """ qemu_cpus = self._qemu_qmp_exec('query-cpus')['return'] if len(qemu_cpus) != len(host_cpus): logger.debug('Host CPU count {0}, Qemu Thread count {1}'.format( len(host_cpus), len(qemu_cpus))) raise ValueError('Host CPU count must match Qemu Thread count') for qemu_cpu, host_cpu in zip(qemu_cpus, host_cpus): cmd = 'taskset -pc {0} {1}'.format(host_cpu, qemu_cpu['thread_id']) (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd) if int(ret_code) != 0: logger.debug('Set affinity failed {0}'.format(stderr)) raise RuntimeError('Set affinity failed on {0}'.format( self._node['host'])) def qemu_set_scheduler_policy(self): """Set scheduler policy to SCHED_RR with priority 1 for all Qemu CPU processes. :raises RuntimeError: Set scheduler policy failed. """ qemu_cpus = self._qemu_qmp_exec('query-cpus')['return'] for qemu_cpu in qemu_cpus: cmd = 'chrt -r -p 1 {0}'.format(qemu_cpu['thread_id']) (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd) if int(ret_code) != 0: logger.debug('Set SCHED_RR failed {0}'.format(stderr)) raise RuntimeError('Set SCHED_RR failed on {0}'.format( self._node['host'])) def qemu_set_node(self, node): """Set node to run QEMU on. :param node: Node to run QEMU on. :type node: dict """ self._node = node self._ssh = SSH() self._ssh.connect(node) self._vm_info['host'] = node['host'] def qemu_add_vhost_user_if(self, socket, server=True, mac=None): """Add Vhost-user interface. :param socket: Path of the unix socket. :param server: If True the socket shall be a listening socket. :param mac: Vhost-user interface MAC address (optional, otherwise is used auto-generated MAC 52:54:00:00:xx:yy). :type socket: str :type server: bool :type mac: str """ self._vhost_id += 1 # Create unix socket character device. chardev = ' -chardev socket,id=char{0},path={1}'.format( self._vhost_id, socket) if server is True: chardev += ',server' self._qemu_opt['options'] += chardev # Create Vhost-user network backend. netdev = (' -netdev vhost-user,id=vhost{0},chardev=char{0},queues={1}'. format(self._vhost_id, self._qemu_opt['queues'])) self._qemu_opt['options'] += netdev # If MAC is not specified use auto-generated MAC address based on # template 52:54:00:00:<qemu_id>:<vhost_id>, e.g. vhost1 MAC of QEMU # with ID 1 is 52:54:00:00:01:01 if mac is None: mac = '52:54:00:00:{0:02x}:{1:02x}'.\ format(self._qemu_id, self._vhost_id) extend_options = 'mq=on,csum=off,gso=off,guest_tso4=off,'\ 'guest_tso6=off,guest_ecn=off,mrg_rxbuf=off' # Create Virtio network device. device = ' -device virtio-net-pci,netdev=vhost{0},mac={1},{2}'.format( self._vhost_id, mac, extend_options) self._qemu_opt['options'] += device # Add interface MAC and socket to the node dict if_data = {'mac_address': mac, 'socket': socket} if_name = 'vhost{}'.format(self._vhost_id) self._vm_info['interfaces'][if_name] = if_data # Add socket to the socket list self._socks.append(socket) def _qemu_qmp_exec(self, cmd): """Execute QMP command. QMP is JSON based protocol which allows to control QEMU instance. :param cmd: QMP command to execute. :type cmd: str :return: Command output in python representation of JSON format. The { "return": {} } response is QMP's success response. An error response will contain the "error" keyword instead of "return". """ # To enter command mode, the qmp_capabilities command must be issued. qmp_cmd = 'echo "{ \\"execute\\": \\"qmp_capabilities\\" }' \ '{ \\"execute\\": \\"' + cmd + \ '\\" }" | sudo -S socat - UNIX-CONNECT:' + self._qmp_sock (ret_code, stdout, stderr) = self._ssh.exec_command(qmp_cmd) if int(ret_code) != 0: logger.debug('QMP execute failed {0}'.format(stderr)) raise RuntimeError('QMP execute "{0}"' ' failed on {1}'.format(cmd, self._node['host'])) logger.trace(stdout) # Skip capabilities negotiation messages. out_list = stdout.splitlines() if len(out_list) < 3: raise RuntimeError('Invalid QMP output on {0}'.format( self._node['host'])) return json.loads(out_list[2]) def _qemu_qga_flush(self): """Flush the QGA parser state """ qga_cmd = '(printf "\xFF"; sleep 1) | sudo -S socat - UNIX-CONNECT:' + \ self._qga_sock #TODO: probably need something else (ret_code, stdout, stderr) = self._ssh.exec_command(qga_cmd) if int(ret_code) != 0: logger.debug('QGA execute failed {0}'.format(stderr)) raise RuntimeError('QGA execute "{0}" ' 'failed on {1}'.format(qga_cmd, self._node['host'])) logger.trace(stdout) if not stdout: return {} return json.loads(stdout.split('\n', 1)[0]) def _qemu_qga_exec(self, cmd): """Execute QGA command. QGA provide access to a system-level agent via standard QMP commands. :param cmd: QGA command to execute. :type cmd: str """ qga_cmd = '(echo "{ \\"execute\\": \\"' + \ cmd + \ '\\" }"; sleep 1) | sudo -S socat - UNIX-CONNECT:' + \ self._qga_sock (ret_code, stdout, stderr) = self._ssh.exec_command(qga_cmd) if int(ret_code) != 0: logger.debug('QGA execute failed {0}'.format(stderr)) raise RuntimeError('QGA execute "{0}"' ' failed on {1}'.format(cmd, self._node['host'])) logger.trace(stdout) if not stdout: return {} return json.loads(stdout.split('\n', 1)[0]) def _wait_until_vm_boot(self, timeout=60): """Wait until QEMU VM is booted. First try to flush qga until there is output. Then ping QEMU guest agent each 5s until VM booted or timeout. :param timeout: Waiting timeout in seconds (optional, default 60s). :type timeout: int """ start = time() while True: if time() - start > timeout: raise RuntimeError('timeout, VM {0} not booted on {1}'.format( self._qemu_opt['disk_image'], self._node['host'])) out = None try: out = self._qemu_qga_flush() except ValueError: logger.trace('QGA qga flush unexpected output {}'.format(out)) # Empty output - VM not booted yet if not out: sleep(5) else: break while True: if time() - start > timeout: raise RuntimeError('timeout, VM {0} not booted on {1}'.format( self._qemu_opt['disk_image'], self._node['host'])) out = None try: out = self._qemu_qga_exec('guest-ping') except ValueError: logger.trace('QGA guest-ping unexpected output {}'.format(out)) # Empty output - VM not booted yet if not out: sleep(5) # Non-error return - VM booted elif out.get('return') is not None: break # Skip error and wait elif out.get('error') is not None: sleep(5) else: # If there is an unexpected output from QGA guest-info, try # again until timeout. logger.trace('QGA guest-ping unexpected output {}'.format(out)) logger.trace('VM {0} booted on {1}'.format( self._qemu_opt['disk_image'], self._node['host'])) def _update_vm_interfaces(self): """Update interface names in VM node dict.""" # Send guest-network-get-interfaces command via QGA, output example: # {"return": [{"name": "eth0", "hardware-address": "52:54:00:00:04:01"}, # {"name": "eth1", "hardware-address": "52:54:00:00:04:02"}]} out = self._qemu_qga_exec('guest-network-get-interfaces') interfaces = out.get('return') mac_name = {} if not interfaces: raise RuntimeError( 'Get VM {0} interface list failed on {1}'.format( self._qemu_opt['disk_image'], self._node['host'])) # Create MAC-name dict for interface in interfaces: if 'hardware-address' not in interface: continue mac_name[interface['hardware-address']] = interface['name'] # Match interface by MAC and save interface name for interface in self._vm_info['interfaces'].values(): mac = interface.get('mac_address') if_name = mac_name.get(mac) if if_name is None: logger.trace('Interface name for MAC {} not found'.format(mac)) else: interface['name'] = if_name def _huge_page_check(self, allocate=False): """Huge page check.""" huge_mnt = self._qemu_opt.get('huge_mnt') mem_size = self._qemu_opt.get('mem_size') # Get huge pages information huge_size = self._get_huge_page_size() huge_free = self._get_huge_page_free(huge_size) huge_total = self._get_huge_page_total(huge_size) # Check if memory reqested by qemu is available on host if (mem_size * 1024) > (huge_free * huge_size): # If we want to allocate hugepage dynamically if allocate: mem_needed = abs((huge_free * huge_size) - (mem_size * 1024)) huge_to_allocate = ((mem_needed / huge_size) * 2) + huge_total max_map_count = huge_to_allocate * 4 # Increase maximum number of memory map areas a process may have cmd = 'echo "{0}" | sudo tee /proc/sys/vm/max_map_count'.format( max_map_count) (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd) # Increase hugepage count cmd = 'echo "{0}" | sudo tee /proc/sys/vm/nr_hugepages'.format( huge_to_allocate) (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd) if int(ret_code) != 0: logger.debug('Mount huge pages failed {0}'.format(stderr)) raise RuntimeError('Mount huge pages failed on {0}'.format( self._node['host'])) # If we do not want to allocate dynamicaly end with error else: raise RuntimeError('Not enough free huge pages: {0}, ' '{1} MB'.format(huge_free, huge_free * huge_size)) # Check if huge pages mount point exist has_huge_mnt = False (_, output, _) = self._ssh.exec_command('cat /proc/mounts') for line in output.splitlines(): # Try to find something like: # none /mnt/huge hugetlbfs rw,relatime,pagesize=2048k 0 0 mount = line.split() if mount[2] == 'hugetlbfs' and mount[1] == huge_mnt: has_huge_mnt = True break # If huge page mount point not exist create one if not has_huge_mnt: cmd = 'mkdir -p {0}'.format(huge_mnt) (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd) if int(ret_code) != 0: logger.debug('Create mount dir failed: {0}'.format(stderr)) raise RuntimeError('Create mount dir failed on {0}'.format( self._node['host'])) cmd = 'mount -t hugetlbfs -o pagesize=2048k none {0}'.format( huge_mnt) (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd) if int(ret_code) != 0: logger.debug('Mount huge pages failed {0}'.format(stderr)) raise RuntimeError('Mount huge pages failed on {0}'.format( self._node['host'])) def _get_huge_page_size(self): """Get default size of huge pages in system. :returns: Default size of free huge pages in system. :rtype: int :raises: RuntimeError if reading failed for three times. """ # TODO: remove to dedicated library cmd_huge_size = "grep Hugepagesize /proc/meminfo | awk '{ print $2 }'" for _ in range(3): (ret, out, _) = self._ssh.exec_command_sudo(cmd_huge_size) if ret == 0: try: huge_size = int(out) except ValueError: logger.trace('Reading huge page size information failed') else: break else: raise RuntimeError('Getting huge page size information failed.') return huge_size def _get_huge_page_free(self, huge_size): """Get total number of huge pages in system. :param huge_size: Size of hugepages. :type huge_size: int :returns: Number of free huge pages in system. :rtype: int :raises: RuntimeError if reading failed for three times. """ # TODO: add numa aware option # TODO: remove to dedicated library cmd_huge_free = 'cat /sys/kernel/mm/hugepages/hugepages-{0}kB/'\ 'free_hugepages'.format(huge_size) for _ in range(3): (ret, out, _) = self._ssh.exec_command_sudo(cmd_huge_free) if ret == 0: try: huge_free = int(out) except ValueError: logger.trace('Reading free huge pages information failed') else: break else: raise RuntimeError('Getting free huge pages information failed.') return huge_free def _get_huge_page_total(self, huge_size): """Get total number of huge pages in system. :param huge_size: Size of hugepages. :type huge_size: int :returns: Total number of huge pages in system. :rtype: int :raises: RuntimeError if reading failed for three times. """ # TODO: add numa aware option # TODO: remove to dedicated library cmd_huge_total = 'cat /sys/kernel/mm/hugepages/hugepages-{0}kB/'\ 'nr_hugepages'.format(huge_size) for _ in range(3): (ret, out, _) = self._ssh.exec_command_sudo(cmd_huge_total) if ret == 0: try: huge_total = int(out) except ValueError: logger.trace('Reading total huge pages information failed') else: break else: raise RuntimeError('Getting total huge pages information failed.') return huge_total def qemu_start(self): """Start QEMU and wait until VM boot. :return: VM node info. :rtype: dict .. note:: First set at least node to run QEMU on. .. warning:: Starts only one VM on the node. """ # SSH forwarding ssh_fwd = '-net user,hostfwd=tcp::{0}-:22'.format( self._qemu_opt.get('ssh_fwd_port')) # Memory and huge pages mem = '-object memory-backend-file,id=mem,size={0}M,mem-path={1},' \ 'share=on -m {0} -numa node,memdev=mem'.format( self._qemu_opt.get('mem_size'), self._qemu_opt.get('huge_mnt')) # By default check only if hugepages are available. # If 'huge_allocate' is set to true try to allocate as well. self._huge_page_check(allocate=self._qemu_opt.get('huge_allocate')) # Disk option drive = '-drive file={0},format=raw,cache=none,if=virtio'.format( self._qemu_opt.get('disk_image')) # Setup QMP via unix socket qmp = '-qmp unix:{0},server,nowait'.format(self._qmp_sock) # Setup serial console serial = '-chardev socket,host=127.0.0.1,port={0},id=gnc0,server,' \ 'nowait -device isa-serial,chardev=gnc0'.format( self._qemu_opt.get('serial_port')) # Setup QGA via chardev (unix socket) and isa-serial channel qga = '-chardev socket,path={0},server,nowait,id=qga0 ' \ '-device isa-serial,chardev=qga0'.format(self._qga_sock) # Graphic setup graphic = '-monitor none -display none -vga none' # PID file pid = '-pidfile {}'.format(self._pid_file) # Run QEMU cmd = '{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}'.format( self._qemu_bin, self._qemu_opt.get('smp'), mem, ssh_fwd, self._qemu_opt.get('options'), drive, qmp, serial, qga, graphic, pid) try: (ret_code, _, _) = self._ssh.exec_command_sudo(cmd, timeout=300) if int(ret_code) != 0: raise RuntimeError('QEMU start failed on {0}'.format( self._node['host'])) # Wait until VM boot self._wait_until_vm_boot() except (RuntimeError, SSHTimeout): self.qemu_kill_all() self.qemu_clear_socks() raise logger.trace('QEMU started successfully.') # Update interface names in VM node dict self._update_vm_interfaces() # Return VM node dict return self._vm_info def qemu_quit(self): """Quit the QEMU emulator.""" out = self._qemu_qmp_exec('quit') err = out.get('error') if err is not None: raise RuntimeError('QEMU quit failed on {0}, error: {1}'.format( self._node['host'], json.dumps(err))) def qemu_system_powerdown(self): """Power down the system (if supported).""" out = self._qemu_qmp_exec('system_powerdown') err = out.get('error') if err is not None: raise RuntimeError('QEMU system powerdown failed on {0}, ' 'error: {1}'.format(self._node['host'], json.dumps(err))) def qemu_system_reset(self): """Reset the system.""" out = self._qemu_qmp_exec('system_reset') err = out.get('error') if err is not None: raise RuntimeError('QEMU system reset failed on {0}, ' 'error: {1}'.format(self._node['host'], json.dumps(err))) def qemu_kill(self): """Kill qemu process.""" # Note: in QEMU start phase there are 3 QEMU processes because we # daemonize QEMU self._ssh.exec_command_sudo('chmod +r {}'.format(self._pid_file)) self._ssh.exec_command_sudo('kill -SIGKILL $(cat {})'.format( self._pid_file)) # Delete PID file cmd = 'rm -f {}'.format(self._pid_file) self._ssh.exec_command_sudo(cmd) def qemu_kill_all(self, node=None): """Kill all qemu processes on DUT node if specified. :param node: Node to kill all QEMU processes on. :type node: dict """ if node: self.qemu_set_node(node) self._ssh.exec_command_sudo('pkill -SIGKILL qemu') def qemu_clear_socks(self): """Remove all sockets created by QEMU.""" # If serial console port still open kill process cmd = 'fuser -k {}/tcp'.format(self._qemu_opt.get('serial_port')) self._ssh.exec_command_sudo(cmd) # Delete all created sockets for sock in self._socks: cmd = 'rm -f {}'.format(sock) self._ssh.exec_command_sudo(cmd) def qemu_system_status(self): """Return current VM status. VM should be in following status: - debug: QEMU running on a debugger - finish-migrate: paused to finish the migration process - inmigrate: waiting for an incoming migration - internal-error: internal error has occurred - io-error: the last IOP has failed - paused: paused - postmigrate: paused following a successful migrate - prelaunch: QEMU was started with -S and guest has not started - restore-vm: paused to restore VM state - running: actively running - save-vm: paused to save the VM state - shutdown: shut down (and -no-shutdown is in use) - suspended: suspended (ACPI S3) - watchdog: watchdog action has been triggered - guest-panicked: panicked as a result of guest OS panic :return: VM status. :rtype: str """ out = self._qemu_qmp_exec('query-status') ret = out.get('return') if ret is not None: return ret.get('status') else: err = out.get('error') raise RuntimeError('QEMU query-status failed on {0}, ' 'error: {1}'.format(self._node['host'], json.dumps(err))) @staticmethod def build_qemu(node, force_install=False, apply_patch=False): """Build QEMU from sources. :param node: Node to build QEMU on. :param force_install: If True, then remove previous build. :param apply_patch: If True, then apply patches from qemu_patches dir. :type node: dict :type force_install: bool :type apply_patch: bool :raises: RuntimeError if building QEMU failed. """ ssh = SSH() ssh.connect(node) directory = ' --directory={0}'.format(Constants.QEMU_INSTALL_DIR) version = ' --version={0}'.format(Constants.QEMU_INSTALL_VERSION) force = ' --force' if force_install else '' patch = ' --patch' if apply_patch else '' (ret_code, stdout, stderr) = \ ssh.exec_command( "sudo -E sh -c '{0}/{1}/qemu_build.sh{2}{3}{4}{5}'"\ .format(Constants.REMOTE_FW_DIR, Constants.RESOURCES_LIB_SH, version, directory, force, patch), 1000) if int(ret_code) != 0: logger.debug('QEMU build failed {0}'.format(stdout + stderr)) raise RuntimeError('QEMU build failed on {0}'.format(node['host']))