def _periodic_snapshots_func(self): """Create a number of snapshots periodically on a VM group. This function blocks for up to interval_minutes * (num_snapshots - 1) minutes. It may block for less time if the test is marked to stop. Raises: CurieTestException: If the test's cluster's hypervisor is unsupported. Propagated from __create_nutanix_snapshots. """ log.info("Creating %d periodic snapshots (period = %d minutes(s))", self.num_snapshots, self.interval_minutes) sanitized_name = NameUtil.sanitize_filename(self.vm_group.name()) snapshot_tag = NameUtil.entity_name( self.scenario, "%s_%d" % (sanitized_name, self.num_snapshots)) for snapshot_idx in xrange(self.num_snapshots): if self.scenario.should_stop(): log.warning("Test stopped (created %d snapshots of %d)", snapshot_idx, self.num_snapshots) break else: self.snapshot_num = snapshot_idx + 1 self.scenario.cluster.snapshot_vms(self.vm_group.get_vms(), tag=snapshot_tag) self.create_annotation( "%s: Snapshot %d" % (self.vm_group.name(), snapshot_idx + 1)) if snapshot_idx < self.num_intervals: Wait(self.scenario, self.interval_minutes * 60)()
def _cmd(self, randseed=None): """Construct the FIO command to generate the given workload. Returns: str: FIO command string. """ iops_log_path = os.path.join(self._remote_results_path, NameUtil.sanitize_filename(self._name)) lat_log_path = os.path.join(self._remote_results_path, NameUtil.sanitize_filename(self._name)) cmd = cStringIO.StringIO() cmd.write("/usr/bin/fio " "--write_iops_log=%s " "--write_lat_log=%s " "--log_avg_msec=%d " "--output-format=json " "--status-interval=%d " % (iops_log_path, lat_log_path, 1000, self.reporting_interval)) if self._workload_duration_secs: cmd.write("--runtime=%d " % self._workload_duration_secs) if randseed: cmd.write("--randseed=%d " % randseed) else: cmd.write("--randrepeat=0 ") cmd.write("--output=%s %s" % (self.__remote_results_file, self._remote_config_path)) cmd_str = cmd.getvalue() log.debug("FIO command: %s", cmd_str) return cmd_str
def test_create_vm(self): expected_goldimage_name = NameUtil.goldimage_vmdisk_name( "ubuntu1604-x86_64", "os") vm_name = NameUtil.goldimage_vm_name(self.scenario, "ubuntu1604") datastore_name = self.cluster._mgmt_server_info.prism_container_id node_id = self.cluster.nodes()[0].node_id() vm = self.cluster.create_vm(self.scenario.goldimages_directory, "ubuntu1604", vm_name, vcpus=1, ram_mb=1024, node_id=node_id, datastore_name=datastore_name, data_disks=[10, 20, 30]) vms = self.cluster.find_vms([vm_name]) assert len(vms) == 1, "Too many VMs found for %s" % vm_name assert vms[0].vm_name() == vm_name, "VM found %s wasn't %s" % ( vms[0].vm_name(), vm_name) assert isinstance(vms[0], AcropolisVm), ("VM is %s instead of AcropolisVm" % str(type(vms[0]))) found_images = self.__get_image_names() assert expected_goldimage_name in found_images, \ "Goldimage disk wasn't found in image service." self.scenario.cluster.cleanup() found_images = self.__get_image_names() assert expected_goldimage_name not in found_images, \ "Goldimage disk wasn't found in image service."
def __test_vm_name(self, vm_id): """Construct a VM name. Args: vm_id (str): Unique identifier for this VM. Returns: str: VM name. """ pattern = "%s_%s" % (NameUtil.sanitize_filename(self._name), vm_id) return NameUtil.test_vm_name(self._scenario, pattern)
def _scenario_results_loop(self, interval_secs=10): """ Periodically update the scenario results while the scenario is running. """ target_config_path = None if self.prometheus_config_directory: target_filename = NameUtil.sanitize_filename("targets_%s.json" % self.id) target_config_path = os.path.join(self.prometheus_config_directory, target_filename) try: while self._end_time_secs is None: with self.__sleep_cv: self.__sleep_cv.wait(interval_secs) self._scenario_results_update(target_config_path) except BaseException as exc: with self._lock: self._status = Status.INTERNAL_ERROR self.__error_message = str(exc) log.exception("Unhandled exception occurred inside " "_scenario_results_loop") else: log.debug("_scenario_results_loop exited cleanly") finally: if target_config_path and os.path.isfile(target_config_path): os.remove(target_config_path) log.debug("%s: Removed file %r", self, target_config_path) log.debug("%s: Exiting _scenario_results_loop", self)
def deploy_goldimage_image_service(self, goldimages_directory, goldimage_name): """ Deploy a gold image to the image service. Args: goldimage_name (str): Name of the gold image to deploy. Returns: str: ID of the created disk image. """ arch = self.get_cluster_architecture() # Select a vdisk format to use. Currently PPC64LE goldimages are only built # using qcow2 format and the x86_64 in vmdk. We could have the manager # perform a conversion, but acropolis can already do the image conversion # for us. if arch == GoldImageManager.ARCH_PPC64LE: disk_format = GoldImageManager.FORMAT_QCOW2 else: disk_format = GoldImageManager.FORMAT_VMDK # Use the GoldImage manager to get a path to our appropriate goldimage goldimage_manager = GoldImageManager(goldimages_directory) goldimage_path = goldimage_manager.get_goldimage_path( goldimage_name, format_str=disk_format, arch=arch) log.debug("Deploying %s to cluster", goldimage_path) # Deploy the image to service disk_name = os.path.splitext(os.path.basename(goldimage_path))[0] img_uuid, tid, _ = self._prism_client.images_create( NameUtil.goldimage_vmdisk_name(disk_name, "os"), goldimage_path, self._container_id) TaskPoller.execute_parallel_tasks(tasks=PrismTask.from_task_id( self._prism_client, tid), timeout_secs=3600) # NB: Required due to possible AHV bug. See XRAY-225. num_images_get_retries = 5 for attempt_num in xrange(num_images_get_retries): images_get_data = self._prism_client.images_get(image_id=img_uuid) image_state = images_get_data["image_state"] if image_state.lower() == "active": # Return the disk image return images_get_data["vm_disk_id"] else: log.info( "Waiting for created image to become active " "(imageState: %s, retry %d of %d)", image_state, attempt_num + 1, num_images_get_retries) log.debug(images_get_data) time.sleep(1) else: raise CurieException( CurieError.kInternalError, "Created image failed to become active within " "%d attempts" % num_images_get_retries)
def series_list_to_result_pbs(self, series_list): result_pbs = [] if self.aggregate: series = self._combine_results(series_list, how=self.aggregate) if series is None or series.empty: series_list = [] else: series_list = [series] run_phase_start = pd.to_datetime(self.scenario.phase_start_time_secs( self.scenario.phase.RUN), unit="s") for result_num, series in enumerate(series_list): x_unit = curie_test_pb2.CurieTestResult.Data2D.kUnixTimestamp y_unit = None if self.metric.rate == CurieMetric.kPerSecond: if self.metric.unit == CurieMetric.kOperations: y_unit = curie_test_pb2.CurieTestResult.Data2D.kIOPS elif self.metric.unit == CurieMetric.kKilobytes: # Convert from KB/s to B/s series *= 1024 y_unit = curie_test_pb2.CurieTestResult.Data2D.kBytesPerSecond else: y_unit = curie_test_pb2.CurieTestResult.Data2D.kCount log.warning("Unexpected unit %s, defaulting to %s", CurieMetric.Unit.Name(self.metric.unit), y_unit) elif self.metric.unit == CurieMetric.kPercent: y_unit = curie_test_pb2.CurieTestResult.Data2D.kPercent elif self.metric.unit == CurieMetric.kMegahertz: # Convert from megahertz to hertz series *= 1e6 y_unit = curie_test_pb2.CurieTestResult.Data2D.kHertz else: y_unit = curie_test_pb2.CurieTestResult.Data2D.kCount log.warning("Unexpected rate %s, defaulting to %s", CurieMetric.Rate.Name(self.metric.rate), y_unit) assert y_unit is not None test_result = FioWorkloadResult.series_to_result_pb( series[run_phase_start:]) test_result.data_2d.x_unit_type = x_unit test_result.data_2d.y_unit_type = y_unit test_result.name = self.kwargs.get("title", self.name) if len(series_list) > 1: test_result.name += " (Node %d)" % result_num test_result.description = self.kwargs.get("description", self.metric.description) test_result.group = self.__class__.__name__ test_result.result_id = NameUtil.sanitize_filename( test_result.name) test_result.data_2d.report_metrics.extend(self.report_metrics) self._add_expected_value_details(test_result) if self.report_group: test_result.data_2d.report_group = self.report_group test_result.result_hint = self.kwargs.get("result_hint", "") result_pbs.append(test_result) return result_pbs
def get_local_results_path(self, vm): """ Get the local results path for this workload generator and VM. Args: vm: (curie.vm.Vm) VM associated with this results path. Returns: (str) command id """ return os.path.join(self._local_path_root, vm.vm_name(), NameUtil.sanitize_filename(self._name))
def test_template_clone_default(self): self.scenario.vm_groups = { self.group_name: VMGroup(self.scenario, self.group_name, template="ubuntu1604", template_type="DISK", count_per_cluster=1, data_disks=[1])} vms = steps.vm_group.CloneFromTemplate(self.scenario, self.group_name)() expected = ["__curie_test_%d_%s_0000" % (self.scenario.id, NameUtil.sanitize_filename(self.group_name))] self.assertEqual(set([vm.vm_name() for vm in vms]), set(expected))
def __deploy_from_template(self): """Deploy a VM template. If the template has already been deployed on the cluster, this function is a no-op. Returns: CurieVM: VM of the deployed template. Raises: CurieException: If no image named goldimage_name exists. CurieTestException: If the VM fails to be created properly. """ image_name = "%s_%s" % (self.vm_group.template_name(), NameUtil.sanitize_filename( self.vm_group.name())) vm_name = NameUtil.goldimage_vm_name(self.scenario, image_name) vm = self.scenario.cluster.find_vm(vm_name) if vm is not None: return vm node_id = self.scenario.cluster.nodes()[0].node_id() if self.vm_group.template_type() == "OVF": return self.scenario.cluster.import_vm( self.scenario.goldimages_directory, self.vm_group.template_name(), vm_name, node_id=node_id) elif self.vm_group.template_type() == "DISK": return self.scenario.cluster.create_vm( self.scenario.goldimages_directory, self.vm_group.template_name(), vm_name, node_id=node_id, vcpus=self.vm_group.vcpus(), ram_mb=self.vm_group.ram_mb(), data_disks=self.vm_group.data_disks())
def test_template_clone_linked_count(self): count = 2 self.scenario.vm_groups = { self.group_name: VMGroup(self.scenario, self.group_name, template="ubuntu1604", template_type="DISK", count_per_cluster=count, data_disks=[1])} vms = steps.vm_group.CloneFromTemplate(self.scenario, self.group_name, linked_clone = True)() expected = ["__curie_test_%d_%s_%04d" % (self.scenario.id, NameUtil.sanitize_filename(self.group_name), index) for index in xrange(count)] self.assertEqual(set([vm.vm_name() for vm in vms]), set(expected))
def _run(self): """Run a command on every VM in a VM group. Raises: CurieTestException: If vm_group_name does not match any existing VMs. If fail_on_error is True and the command fails on any VM. """ vms = self.vm_group.get_vms() if not vms: raise NoVMsFoundError( "Failed to execute '%s' on VM Group '%s'" % (self.command, self.vm_group.name()), self) self.create_annotation("%s: Executing '%s'" % (self.vm_group.name(), self.command)) for vm in vms: cmd_id = "%s_%s_%d" % (vm.vm_name(), NameUtil.sanitize_filename( self.command), time.time() * 1e6) try: exit_status, _, _ = vm.execute_sync(cmd_id, self.command, self.timeout_secs, user=self.user) except Exception: if self.fail_on_error: log.exception( "An unhandled exception occurred in execute_sync") raise else: log.warning( "An unhandled exception occurred in execute_sync", exc_info=True) else: if exit_status != 0: msg = ("VM '%s' command '%s' returned non-zero status" % (vm.vm_name(), self.command)) if self.fail_on_error: raise CurieTestException( cause=msg, impact="The command did not complete successfully.", corrective_action= "If you are the author of the scenario, please check the syntax " "of the command requested in the %s step." % self.name) else: log.warning(msg)
def __init__(self, name, scenario, configuration, short_name): """ Create an IOGen object. Args: name: (str) Name of the iogen instance, used throughout. i.e. "oltp_run" scenario: (Scenario) Used to acquire scenario information. configuration: (configuration object) Depends on the workload generator, but is the configuration to be used. short_name: (str) Used as a name for the base directory name of the iogen-produced files as well as an extension to the iogen configuration input files. """ # The name of the workload generator instance. self._name = name # The scenario object this instance is related to. self._scenario = scenario # Name of the workload generator type. (e.g. fio) self._short_name = short_name # Configuration object for workload generator. self._configuration = configuration self._workload_duration_secs = None self._expected_workload_finish_secs = None self.__workload_start_secs = None self.__workload_end_secs = None self.__prepared_vms = None # Base path on the remote virtual machine. (e.g. /home/nutanix/output/fio) self._remote_path_root = os.path.join(_util.remote_output_directory(), self._short_name) # Path to output path on remote virtual machine. # (e.g. /home/nutanix/output/fio/dss_prefill) self._remote_results_path = os.path.join( self._remote_path_root, NameUtil.sanitize_filename(self._name)) # Path to configuration on the remote VM. self._remote_config_path = "%s.%s" % (self._remote_results_path, self._short_name) self._local_path_root = os.path.join(self._scenario.test_dir(), self._short_name)
def test_create_vm(self): vm_name = NameUtil.goldimage_vm_name(self.scenario, "ubuntu1604") datastore_name = self.cluster._vcenter_info.vcenter_datastore_name node_id = self.cluster.nodes()[0].node_id() vm = self.cluster.create_vm(self.scenario.goldimages_directory, "ubuntu1604", vm_name, vcpus=1, ram_mb=1024, node_id=node_id, datastore_name=datastore_name, data_disks=[10, 20, 30]) vms = self.cluster.find_vms([vm_name]) assert len(vms) == 1, "Too many VMs found for %s" % vm_name assert vms[0].vm_name() == vm_name, "VM found %s wasn't %s" % ( vms[0].vm_name(), vm_name) assert isinstance(vms[0], VsphereVm), ("VM is %s instead of VsphereVm" % str(type(vms[0]))) paths = self._get_datastore_paths("__curie_goldimage*") assert len(paths) > 0, "Goldimage paths were not found in datastore. %s" % paths self.scenario.cluster.cleanup() paths = self._get_datastore_paths("__curie_goldimage*") assert len(paths) == 0, \ "Goldimage paths were found in datastore after cleanup."
def is_nutanix_cvm(vm): vm_name = vm["name"] return NameUtil.is_hyperv_cvm_vm(vm_name)
def _get_curie_vms(scenario): vms = scenario.cluster.vms() curie_vm_names, _ = NameUtil.filter_test_vm_names( [vm.vm_name() for vm in vms], []) return [vm for vm in vms if vm.vm_name() in curie_vm_names]
def _run(self): test_vms, _ = NameUtil.filter_test_vms(self.scenario.cluster.vms(), [self.scenario.id]) self.scenario.cluster.disable_ha_vms(test_vms) self.create_annotation("%s: Disabled HA on VMs" % self.vm_group.name())
def tearDown(self): test_vms, _ = NameUtil.filter_test_vms(self.cluster.vms(), [self.scenario.id]) self.cluster.power_off_vms(test_vms) self.cluster.delete_vms(test_vms)
continue pid = int(name) try: cmdline = open("/proc/%d/cmdline" % pid).read().replace( "\0", " ") except (IOError, OSError), ex: continue match = re.search("python .*curie_cmd_wrapper (\d+)", cmdline) if not match: continue cmd_id = match.group(1) cmd_id_pid_map[cmd_id] = pid # Kill any unknown commands (ones with no entry in the commands directory). cmd_ids = set(os.listdir(self.cmds_dir())) for cmd_id, pid in cmd_id_pid_map.iteritems(): if NameUtil.sanitize_filename(cmd_id) not in cmd_ids: log.warning("Killing process group %d for unknown command %s", pid, cmd_id) try: os.killpg(pid, signal.SIGKILL) except OSError, ex: CHECK_EQ(ex.errno, errno.ESRCH, msg=str(ex)) # Reconstruct self.__cmd_map. for cmd_id in cmd_ids: status_path = os.path.join(self.cmd_dir(cmd_id), "status.bin") cmd_status = CmdStatus() cmd_status.ParseFromString(open(status_path).read()) if cmd_status.HasField("pid"): pid = cmd_status.pid else: pid = None
def cmd_dir(cmd_id, root=None): return os.path.join(CurieUnixAgent.cmds_dir(root=root), NameUtil.sanitize_filename(cmd_id))