def setup(self): """ Setting up test parameters """ log.info("Starting the test setup") self.benchmark_name = "SmallFiles" self.client_pod_name = "smallfile-client" if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() else: if config.PERF.get("internal_es_server") == "": self.es = None return else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", } # verify that the connection to the elasticsearch server is OK if not super(TestSmallFileWorkload, self).es_connect(): self.es = None return super(TestSmallFileWorkload, self).setup() # deploy the benchmark-operator (ripsaw) self.ripsaw = RipSaw() self.ripsaw_deploy(self.ripsaw)
def setup(self): """ Setting up test parameters """ log.info("Starting the test setup") self.benchmark_name = "FIO" self.client_pod_name = "fio-client" if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() else: if config.PERF.get("internal_es_server") == "": self.es = None return else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", "parallel": True, } # verify that the connection to the elasticsearch server is OK if not super(TestFIOBenchmark, self).es_connect(): self.es = None return super(TestFIOBenchmark, self).setup() # deploy the benchmark-operator self.deploy_benchmark_operator()
def es(request): def teardown(): es.cleanup() request.addfinalizer(teardown) es = ElasticSearch() return es
def setup(self): self.es = ElasticSearch() # Deploy the benchmark operator log.info("Apply Operator CRD") self.operator = benchmark_operator.BenchmarkOperator() self.operator.deploy()
def es(request): # Create internal ES only if Cloud platform is tested if node.get_provider().lower() in constants.CLOUD_PLATFORMS: es = ElasticSearch() else: es = None def teardown(): if es is not None: es.cleanup() time.sleep(10) request.addfinalizer(teardown) return es
def setup(self): """ Initialize the test environment """ # Deploy internal ES server - not need to keep results, # so don't use production ES self.es = ElasticSearch() # Initial the Small Files workload, based on benchmark-operator self.small_files = SmallFiles(self.es) self.ceph_cluster = CephCluster() # Get the total storage capacity self.ceph_capacity = self.ceph_cluster.get_ceph_capacity() log.info(f"Total storage capacity is {self.ceph_capacity:,.2f} GiB") # Collect the pulls usage before the test is starting self.orig_data = self.get_cephfs_data()
def setup_internal_es(self): """ Setting up the internal ElasticSearch server to used by the benchmark """ if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() else: if config.PERF.get("internal_es_server") == "": self.es = None else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", } # verify that the connection to the elasticsearch server is OK if not self.es_connect(): self.es = None
def test_smallfile_workload(self, file_size, files, threads, samples, clients, interface): """ Run SmallFile Workload Args: file_size (int) : the size of the file to be used files (int) : number of files to use threads (int) : number of threads to be use in the test samples (int) : how meany samples to run for each test interface (str) : the volume type (rbd / cephfs) """ if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() else: if config.PERF.get("internal_es_server") == "": self.es = None return else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", } # verify that the connection to the elasticsearch server is OK if not super(TestSmallFileWorkload, self).es_connect(): self.es = None return # deploy the benchmark-operator self.deploy_benchmark_operator() # verify that there is an elasticsearch server for the benchmark if not self.es: log.error("This test must have an Elasticsearch server") return False # Getting the full path for the test logs self.full_log_path = get_full_test_logs_path(cname=self) self.results_path = get_full_test_logs_path(cname=self) self.full_log_path += ( f"-{file_size}-{files}-{threads}-{samples}-{clients}-{interface}") log.info(f"Logs file path name is : {self.full_log_path}") # Loading the main template yaml file for the benchmark log.info("Create resource file for small_files workload") self.crd_data = templating.load_yaml( constants.SMALLFILE_BENCHMARK_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml self.es_info_backup(self.es) self.set_storageclass(interface=interface) # Setting the data set to 40% of the total storage capacity self.setting_storage_usage(file_size, files, threads, samples, clients) self.get_env_info() if not self.run(): log.error("The benchmark failed to run !") return # Setting back the original elastic-search information if self.backup_es: self.crd_data["spec"]["elasticsearch"] = self.backup_es # Initialize the results doc file. full_results = self.init_full_results( SmallFileResultsAnalyse(self.uuid, self.crd_data, self.full_log_path, self.main_es)) log.info(f"Full results is : {full_results.results}") if isinstance(self.es, ElasticSearch): # Using internal deployed elasticsearch log.info("Getting data from internal ES") if self.main_es: self.copy_es_data(self.es) full_results.read() else: log.info("Dumping data from the Internal ES to tar ball file") self.es.dumping_all_data(self.full_log_path) else: log.info(self.es) self.es = Elasticsearch(hosts=[{ "host": self.es["server"], "port": self.es["port"] }]) full_results.read() full_results.add_key("test_time", { "start": self.start_time, "end": self.end_time }) if self.main_es: full_results.es = self.main_es if not full_results.dont_check: full_results.add_key("hosts", full_results.get_clients_list()) full_results.init_full_results() full_results.aggregate_host_results() test_status = full_results.aggregate_samples_results() # Generate link for the all data in the kibana columens = "optype,files,filesPerSec,elapsed,sample,tid" klink = self.generate_kibana_link("ripsaw-smallfile-results", columens) # Generate link for the all response-time data in the kibana columens = "optype,sample,iops,max,min,mean,'90%25','95%25','99%25'" rtlink = self.generate_kibana_link("ripsaw-smallfile-rsptimes", columens) full_results.all_results = { "kibana_all": klink, "kibana_rsptime": rtlink } if full_results.es_write(): res_link = full_results.results_link() log.info(f"The Result can be found at : {res_link}") # Create text file with results of all subtest (4 - according to the parameters) self.write_result_to_file(res_link) else: test_status = True assert test_status, "Test Failed !"
def test_pvc_snapshot_performance_multiple_files(self, file_size, files, threads, interface): """ Run SmallFile Workload and the take snapshot. test will run with 1M of file on the volume - total data set is the same for all tests, ~30GiB, and then take snapshot and measure the time it takes. the test will run 3 time to check consistency. Args: file_size (int): the size of the file to be create - in KiB files (int): number of files each thread will create threads (int): number of threads will be used in the workload interface (str): the volume interface that will be used CephBlockPool / CephFileSystem Raises: TimeoutError : in case of creation files take too long time more then 2 Hours """ # Deploying elastic-search server in the cluster for use by the # SmallFiles workload, since it is mandatory for the workload. # This is deployed once for all test iterations and will be deleted # in the end of the test. self.es = ElasticSearch() # Loading the main template yaml file for the benchmark and update some # fields with new values sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) if interface == constants.CEPHBLOCKPOOL: storageclass = constants.DEFAULT_STORAGECLASS_RBD else: storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS log.info(f"Using {storageclass} Storageclass") # Setting up the parameters for this test sf_data["spec"]["workload"]["args"]["samples"] = 1 sf_data["spec"]["workload"]["args"]["operation"] = ["create"] sf_data["spec"]["workload"]["args"]["file_size"] = file_size sf_data["spec"]["workload"]["args"]["files"] = files sf_data["spec"]["workload"]["args"]["threads"] = threads sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass sf_data["spec"]["elasticsearch"] = { "url": f"http://{self.es.get_ip()}:{self.es.get_port()}" } """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. Since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ total_files = int(files * threads) total_data = int(files * threads * file_size / constants.GB2KB) data_set = int(total_data * 3) # calculate data with replica vol_size = data_set if data_set >= 100 else 100 sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi" environment = get_environment_info() if not environment["user"] == "": sf_data["spec"]["test_user"] = environment["user"] else: # since full results object need this parameter, initialize it from CR file environment["user"] = sf_data["spec"]["test_user"] sf_data["spec"]["clustername"] = environment["clustername"] log.debug(f"The smallfile yaml file is {sf_data}") # Deploy the benchmark-operator, so we can use the SmallFiles workload # to fill up the volume with files, and switch to the benchmark-operator namespace. log.info("Deploy the benchmark-operator") self.deploy_benchmark_operator() switch_to_project(BMO_NAME) all_results = [] self.results_path = get_full_test_logs_path(cname=self) log.info(f"Logs file path name is : {self.full_log_path}") # Produce ES report # Collecting environment information self.get_env_info() # Initialize the results doc file. self.full_results = self.init_full_results( ResultsAnalyse( self.uuid, self.crd_data, self.full_log_path, "pvc_snapshot_perf_multiple_files", )) self.full_results.add_key("file_size_inKB", file_size) self.full_results.add_key("threads", threads) self.full_results.add_key("interface", interface) for test_num in range(self.tests_numbers): test_results = {"creation_time": None, "csi_creation_time": None} # deploy the smallfile workload log.info("Running SmallFile bench") sf_obj = OCS(**sf_data) sf_obj.create() # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler( 240, 10, get_pod_name_by_pattern, "smallfile-client", BMO_NAME, ): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind="pod", namespace=BMO_NAME) log.info("Waiting for SmallFile benchmark to Run") assert bench_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600, ) # Initialize the pvc_name variable so it will not be in loop scope only. pvc_name = "" for item in bench_pod.get()["items"]: if item.get("metadata").get("name") == small_file_client_pod: for volume in item.get("spec").get("volumes"): if "persistentVolumeClaim" in volume: pvc_name = volume["persistentVolumeClaim"][ "claimName"] break log.info(f"Benchmark PVC name is : {pvc_name}") # Creation of 1M files on CephFS can take a lot of time timeout = 7200 while timeout >= 0: logs = bench_pod.get_logs(name=small_file_client_pod) if "RUN STATUS DONE" in logs: break timeout -= 30 if timeout == 0: raise TimeoutError( "Timed out waiting for benchmark to complete") time.sleep(30) log.info(f"Smallfile test ({test_num + 1}) finished.") # Taking snapshot of the PVC (which contain files) snap_name = pvc_name.replace("claim", "snapshot-") log.info(f"Taking snapshot of the PVC {pvc_name}") log.info(f"Snapshot name : {snap_name}") start_time = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%SZ") test_results["creation_time"] = self.measure_create_snapshot_time( pvc_name=pvc_name, snap_name=snap_name, namespace=BMO_NAME, interface=interface, start_time=start_time, ) log.info( f"Snapshot with name {snap_name} and id {self.snap_uid} creation time is" f' {test_results["creation_time"]} seconds') test_results[ "csi_creation_time"] = performance_lib.measure_csi_snapshot_creation_time( interface=interface, snapshot_id=self.snap_uid, start_time=start_time) log.info( f"Snapshot with name {snap_name} and id {self.snap_uid} csi creation time is" f' {test_results["csi_creation_time"]} seconds') all_results.append(test_results) # Delete the smallfile workload - which will delete also the PVC log.info("Deleting the smallfile workload") if sf_obj.delete(wait=True): log.info("The smallfile workload was deleted successfully") # Delete VolumeSnapshots log.info("Deleting the snapshots") if self.snap_obj.delete(wait=True): log.info("The snapshot deleted successfully") log.info("Verify (and wait if needed) that ceph health is OK") ceph_health_check(tries=45, delay=60) # Sleep for 1 Min. between test samples time.sleep(60) # Cleanup the elasticsearch instance. log.info("Deleting the elastic-search instance") self.es.cleanup() creation_times = [t["creation_time"] for t in all_results] avg_c_time = statistics.mean(creation_times) csi_creation_times = [t["csi_creation_time"] for t in all_results] avg_csi_c_time = statistics.mean(csi_creation_times) t_dateset = int(data_set / 3) log.info(f"Full test report for {interface}:") log.info(f"Test ran {self.tests_numbers} times, " f"All snapshot creation results are {creation_times} seconds") log.info( f"The average snapshot creation time is : {avg_c_time} seconds") log.info(f"Test ran {self.tests_numbers} times, " f"All snapshot csi creation results are {csi_creation_times}") log.info( f"The average csi snapshot creation time is : {avg_csi_c_time}") log.info(f"Number of Files on the volume : {total_files:,}, " f"Total dataset : {t_dateset} GiB") self.full_results.add_key("avg_snapshot_creation_time_insecs", avg_c_time) self.full_results.all_results["total_files"] = total_files self.full_results.all_results["total_dataset"] = t_dateset self.full_results.all_results["creation_time"] = creation_times self.full_results.all_results["csi_creation_time"] = csi_creation_times # Write the test results into the ES server log.info("writing results to elastic search server") if self.full_results.es_write(): res_link = self.full_results.results_link() # write the ES link to the test results in the test log. log.info(f"The result can be found at : {res_link}") # Create text file with results of all subtest self.write_result_to_file(res_link)
def test_pvc_snapshot_performance_multiple_files(self, file_size, files, threads, interface): """ Run SmallFile Workload and the take snapshot. test will run with 1M of file on the volume - total data set is the same for all tests, ~30GiB, and then take snapshot and measure the time it takes. the test will run 3 time to check consistency. Args: file_size (int): the size of the file to be create - in KiB files (int): number of files each thread will create threads (int): number of threads will be used in the workload interface (str): the volume interface that will be used CephBlockPool / CephFileSystem Raises: TimeoutError : in case of creation files take too long time more then 2 Hours """ # Loading the main template yaml file for the benchmark and update some # fields with new values sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) # Deploying elastic-search server in the cluster for use by the # SmallFiles workload, since it is mandatory for the workload. # This is deployed once for all test iterations and will be deleted # in the end of the test. if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() sf_data["spec"]["elasticsearch"] = { "url": f"http://{self.es.get_ip()}:{self.es.get_port()}" } else: if config.PERF.get("internal_es_server") == "": self.es = None return else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", } # verify that the connection to the elasticsearch server is OK if not super(TestPvcSnapshotPerformance, self).es_connect(): self.es = None log.error( "ElasticSearch doesn't exist ! The test cannot run") return sf_data["spec"]["elasticsearch"] = {"url": self.es["url"]} if interface == constants.CEPHBLOCKPOOL: storageclass = constants.DEFAULT_STORAGECLASS_RBD else: storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS log.info(f"Using {storageclass} Storageclass") # Setting up the parameters for this test sf_data["spec"]["workload"]["args"]["samples"] = 1 sf_data["spec"]["workload"]["args"]["operation"] = ["create"] sf_data["spec"]["workload"]["args"]["file_size"] = file_size sf_data["spec"]["workload"]["args"]["files"] = files sf_data["spec"]["workload"]["args"]["threads"] = threads sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. Since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ total_files = int(files * threads) total_data = int(files * threads * file_size / constants.GB2KB) data_set = int(total_data * 3) # calculate data with replica vol_size = data_set if data_set >= 100 else 100 sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi" environment = get_environment_info() if not environment["user"] == "": sf_data["spec"]["test_user"] = environment["user"] else: # since full results object need this parameter, initialize it from CR file environment["user"] = sf_data["spec"]["test_user"] sf_data["spec"]["clustername"] = environment["clustername"] log.debug(f"The smallfile yaml file is {sf_data}") # Deploy the benchmark-operator, so we can use the SmallFiles workload # to fill up the volume with files, and switch to the benchmark-operator namespace. log.info("Deploy the benchmark-operator") self.deploy_benchmark_operator() switch_to_project(BMO_NAME) all_results = [] # Produce ES report # Collecting environment information self.get_env_info() # Initialize the results doc file. self.full_results = self.init_full_results( ResultsAnalyse( self.uuid, self.crd_data, self.full_log_path, "pvc_snapshot_perf_multiple_files", )) self.full_results.add_key("file_size_inKB", file_size) self.full_results.add_key("threads", threads) self.full_results.add_key("interface", interface) for test_num in range(self.tests_numbers): test_results = {"creation_time": None, "csi_creation_time": None} # deploy the smallfile workload self.crd_data = sf_data self.client_pod_name = "smallfile-client" self.deploy_and_wait_for_wl_to_start(timeout=240) # Initialize the pvc_name variable so it will not be in loop scope only. pvc_name = (OCP(kind="pvc", namespace=BMO_NAME).get().get("items") [0].get("metadata").get("name")) log.info(f"Benchmark PVC name is : {pvc_name}") self.wait_for_wl_to_finish(sleep=30) # Taking snapshot of the PVC (which contain files) snap_name = pvc_name.replace("claim", "snapshot-") log.info(f"Taking snapshot of the PVC {pvc_name}") log.info(f"Snapshot name : {snap_name}") start_time = self.get_time("csi") test_results["creation_time"] = self.measure_create_snapshot_time( pvc_name=pvc_name, snap_name=snap_name, namespace=BMO_NAME, interface=interface, start_time=start_time, ) log.info( f"Snapshot with name {snap_name} and id {self.snap_uid} creation time is" f' {test_results["creation_time"]} seconds') test_results[ "csi_creation_time"] = performance_lib.measure_csi_snapshot_creation_time( interface=interface, snapshot_id=self.snap_uid, start_time=start_time) log.info( f"Snapshot with name {snap_name} and id {self.snap_uid} csi creation time is" f' {test_results["csi_creation_time"]} seconds') all_results.append(test_results) # Delete the smallfile workload - which will delete also the PVC log.info("Deleting the smallfile workload") if self.benchmark_obj.delete(wait=True): log.info("The smallfile workload was deleted successfully") # Delete VolumeSnapshots log.info("Deleting the snapshots") if self.snap_obj.delete(wait=True): log.info("The snapshot deleted successfully") log.info("Verify (and wait if needed) that ceph health is OK") ceph_health_check(tries=45, delay=60) # Sleep for 1 Min. between test samples time.sleep(60) # Cleanup the elasticsearch instance, if needed. if isinstance(self.es, ElasticSearch): log.info("Deleting the elastic-search instance") self.es.cleanup() creation_times = [t["creation_time"] for t in all_results] avg_c_time = statistics.mean(creation_times) csi_creation_times = [t["csi_creation_time"] for t in all_results] avg_csi_c_time = statistics.mean(csi_creation_times) t_dateset = int(data_set / 3) log.info(f"Full test report for {interface}:") log.info(f"Test ran {self.tests_numbers} times, " f"All snapshot creation results are {creation_times} seconds") log.info( f"The average snapshot creation time is : {avg_c_time} seconds") log.info(f"Test ran {self.tests_numbers} times, " f"All snapshot csi creation results are {csi_creation_times}") log.info( f"The average csi snapshot creation time is : {avg_csi_c_time}") log.info(f"Number of Files on the volume : {total_files:,}, " f"Total dataset : {t_dateset} GiB") self.full_results.add_key("avg_snapshot_creation_time_insecs", avg_c_time) self.full_results.all_results["total_files"] = total_files self.full_results.all_results["total_dataset"] = t_dateset self.full_results.all_results["creation_time"] = creation_times self.full_results.all_results["csi_creation_time"] = csi_creation_times # Write the test results into the ES server log.info("writing results to elastic search server") self.results_path = helpers.get_full_test_logs_path(cname=self) if self.full_results.es_write(): res_link = self.full_results.results_link() # write the ES link to the test results in the test log. log.info(f"The result can be found at : {res_link}") # Create text file with results of all subtest self.write_result_to_file(res_link)
def test_elasticsearch(self): """ This test do the following operations: * deploy the elasticsearch module * connect to it * run a simple SmallFile benchmark (to verify usability) * dump the results to a file * push the results from the file to the Dev. ES. * teardown the environment """ log.info("Test with 'Dummy' Storageclass") try: self.es = ElasticSearch(sc="dummy") except ElasticSearchNotDeployed: log.info("Raised as expected !") log.info("Test with 'Real' Storageclass") try: self.es = ElasticSearch() except ElasticSearchNotDeployed as ex: log.error("Raise as expected !") raise ex full_log_path = get_full_test_logs_path(cname=self) log.info(f"Logs file path name is : {full_log_path}") log.info("The ElasticSearch deployment test started.") if self.es.get_health(): log.info("The Status of the elasticsearch is OK") else: log.warning("The Status of the elasticsearch is Not OK") log.info("Waiting another 30 sec.") time.sleep(30) if self.es.get_health(): log.info("The Status of the elasticsearch is OK") else: log.error( "The Status of the elasticsearch is Not OK ! Exiting.") if self.es.get_health(): log.info("\nThe Elastic-Search server information :\n") log.info(f"The Elasticsearch IP is {self.es.get_ip()}") log.info(f"The Elasticsearch port is {self.es.get_port()}") log.info(f"The Password to connect is {self.es.get_password()}") else: assert False, "The Elasticsearch module is not ready !" log.info(f"Test UUDI is : {self.smallfile_run(self.es)}") assert self.es.dumping_all_data( full_log_path), "Can not Retrieve the test data" assert run_command(f"ls {full_log_path}/FullResults.tgz" ), "Results file did not retrieve from pod" # Try to use the development ES server for testing the elasticsearch_load # function to push data into ES server try: main_es = Elasticsearch([{ "host": defaults.ELASTICSEARCH_DEV_IP, "port": defaults.ELASTICSEARCE_PORT, "url": f"http://{defaults.ELASTICSEARCH_DEV_IP}:{defaults.ELASTICSEARCE_PORT}", }]) except esexp.ConnectionError: log.warning("Cannot connect to ES server in the LocalServer") main_es = None assert elasticsearch_load( main_es, full_log_path), "Can not load data into Main ES server"