def test_fio_workload_simple(self, ripsaw, interface, io_pattern): """ This is a basic fio perf test """ # Deployment ripsaw log.info("Deploying ripsaw operator") ripsaw.apply_crd( 'resources/crds/' 'ripsaw_v1alpha1_ripsaw_crd.yaml' ) sc = 'ocs-storagecluster-ceph-rbd' if interface == 'CephBlockPool' else 'ocs-storagecluster-cephfs' # Create fio benchmark log.info("Create resource file for fio workload") fio_cr = templating.load_yaml(constants.FIO_CR_YAML) # Todo: have pvc_size set to 'get_osd_pods_memory_sum * 5' # once pr-2037 is merged fio_cr['spec']['clustername'] = config.ENV_DATA['platform'] + get_build() + get_ocs_version() fio_cr['spec']['test_user'] = get_ocs_version() + interface + io_pattern fio_cr['spec']['workload']['args']['storageclass'] = sc if io_pattern == 'sequential': fio_cr['spec']['workload']['args']['jobs'] = ['write', 'read'] log.info(f'fio_cr: {fio_cr}') fio_cr_obj = OCS(**fio_cr) fio_cr_obj.create() # Wait for fio client pod to be created for fio_pod in TimeoutSampler( 300, 20, get_pod_name_by_pattern, 'fio-client', 'my-ripsaw' ): try: if fio_pod[0] is not None: fio_client_pod = fio_pod[0] break except IndexError: log.info("Bench pod not ready yet") # Wait for fio pod to initialized and complete log.info("Waiting for fio_client to complete") pod_obj = OCP(kind='pod') pod_obj.wait_for_resource( condition='Completed', resource_name=fio_client_pod, timeout=18000, sleep=300, ) output = run_cmd(f'oc logs {fio_client_pod}') try: if 'Fio failed to execute' not in output: log.info("FIO has completed successfully") except IOError: log.info("FIO failed to complete") # Clean up fio benchmark log.info("Deleting FIO benchmark") fio_cr_obj.delete() analyze_regression(io_pattern, sc, es_username=fio_cr['spec']['test_user'])
def get_environment_info(): """ Getting the environment information, Information that will be collected Versions: OCP - version / build / channel OCS - version / build Ceph - version Rook - version Platform: BM / VmWare / Cloud provider etc. Instance type / architecture Cluster name User name that run the test Return: dict: dictionary that contain the environment information """ results = {} # getting the name and email of the user that running the test. try: user = utils.run_cmd('git config --get user.name').strip() email = utils.run_cmd('git config --get user.email').strip() results['user'] = f'{user} <{email}>' except CommandFailed: # if no git user define, the default user is none results['user'] = '' results['clustername'] = ocp.get_clustername() results['platform'] = node.get_provider() if results['platform'].lower() not in constants.ON_PREM_PLATFORMS: results['platform'] = results['platform'].upper() results['ocp_build'] = ocp.get_build() results['ocp_channel'] = ocp.get_ocp_channel() results['ocp_version'] = utils.get_ocp_version() results['ceph_version'] = utils.get_ceph_version() results['rook_version'] = utils.get_rook_version() results['ocs_build'] = ocp.get_ocs_version() # Extracting the version number x.y.z from full build name m = re.match(r"(\d.\d).(\d)", results['ocs_build']) if m and m.group(1) is not None: results['ocs_version'] = m.group(1) # Getting the instance type for cloud or Arch type for None cloud worker_lbl = node.get_typed_nodes( num_of_nodes=1)[0].data['metadata']['labels'] if 'beta.kubernetes.io/instance-type' in worker_lbl: results['worker_type'] = worker_lbl['beta.kubernetes.io/instance-type'] else: results['worker_type'] = worker_lbl['kubernetes.io/arch'] return results
def get_environment_info(): """ Getting the environment information, Information that will be collected Versions: OCP - version / build / channel OCS - version / build Ceph - version Rook - version Platform: BM / VmWare / Cloud provider etc. Instance type / architecture Cluster name User name that run the test Return: dict: dictionary that contain the environment information """ results = {} # getting the name and email of the user that running the test. try: user = utils.run_cmd("git config --get user.name").strip() email = utils.run_cmd("git config --get user.email").strip() results["user"] = f"{user} <{email}>" except CommandFailed: # if no git user define, the default user is none results["user"] = "" results["clustername"] = ocp.get_clustername() results["platform"] = node.get_provider() if results["platform"].lower() not in constants.ON_PREM_PLATFORMS: results["platform"] = results["platform"].upper() results["ocp_build"] = ocp.get_build() results["ocp_channel"] = ocp.get_ocp_channel() results["ocp_version"] = utils.get_ocp_version() results["ceph_version"] = utils.get_ceph_version() results["rook_version"] = utils.get_rook_version() results[ "ocs_build" ] = f"{version.get_ocs_version_from_csv(ignore_pre_release=True)}" # Getting the instance type for cloud or Arch type for None cloud worker_lbl = node.get_nodes(num_of_nodes=1)[0].data["metadata"]["labels"] if "beta.kubernetes.io/instance-type" in worker_lbl: results["worker_type"] = worker_lbl["beta.kubernetes.io/instance-type"] else: results["worker_type"] = worker_lbl["kubernetes.io/arch"] return results
def test_smallfile_workload(self, ripsaw, es, file_size, files, threads, samples, interface): """ Run SmallFile Workload """ # Loading the main template yaml file for the benchmark sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) # getting the name and email of the user that running the test. try: user = run_cmd('git config --get user.name').strip() email = run_cmd('git config --get user.email').strip() except CommandFailed: # if no git user define, use the default user from the CR file user = sf_data['spec']['test_user'] email = '' # Saving the Original elastic-search IP and PORT - if defined in yaml es_server = "" es_port = "" if 'elasticsearch' in sf_data['spec']: if 'server' in sf_data['spec']['elasticsearch']: es_server = sf_data['spec']['elasticsearch']['server'] if 'port' in sf_data['spec']['elasticsearch']: es_port = sf_data['spec']['elasticsearch']['port'] else: sf_data['spec']['elasticsearch'] = {} # Use the internal define elastic-search server in the test sf_data['spec']['elasticsearch'] = { 'server': es.get_ip(), 'port': es.get_port() } log.info("Apply Operator CRD") ripsaw.apply_crd('resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml') if interface == constants.CEPHBLOCKPOOL: storageclass = constants.DEFAULT_STORAGECLASS_RBD else: storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS log.info(f"Using {storageclass} Storageclass") sf_data['spec']['workload']['args']['storageclass'] = storageclass log.info("Running SmallFile bench") """ Setting up the parameters for this test """ sf_data['spec']['workload']['args']['file_size'] = file_size sf_data['spec']['workload']['args']['files'] = files sf_data['spec']['workload']['args']['threads'] = threads sf_data['spec']['workload']['args']['samples'] = samples sf_data['spec']['clustername'] = get_clustername() sf_data['spec']['test_user'] = f'{user}<{email}>' """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. Since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ vol_size = int(files * threads * file_size * 3) vol_size = int(vol_size / constants.GB2KB) if vol_size < 100: vol_size = 100 sf_data['spec']['workload']['args']['storagesize'] = f"{vol_size}Gi" sf_obj = OCS(**sf_data) sf_obj.create() log.info(f'The smallfile yaml file is {sf_data}') # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler(240, 10, get_pod_name_by_pattern, 'smallfile-client', constants.RIPSAW_NAMESPACE): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind='pod', namespace=constants.RIPSAW_NAMESPACE) log.info("Waiting for SmallFile benchmark to Run") assert bench_pod.wait_for_resource(condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600) start_time = time.time() # After testing manually, changing the timeout timeout = 3600 # Getting the UUID from inside the benchmark pod output = bench_pod.exec_oc_cmd(f'exec {small_file_client_pod} -- env') for line in output.split(): if 'uuid=' in line: uuid = line.split('=')[1] log.info(f'the UUID of the test is : {uuid}') # Setting back the original elastic-search information sf_data['spec']['elasticsearch'] = { 'server': es_server, 'port': es_port } full_results = SmallFileResultsAnalyse(uuid, sf_data) # Initialaize the results doc file. full_results.add_key('user', sf_data['spec']['test_user']) full_results.add_key('ocp_version', get_ocp_version()) full_results.add_key('ocp_build', get_build()) full_results.add_key('ocp_channel', get_ocp_channel()) # Getting the OCS version (ocs_ver_info, _) = get_ocs_version() ocs_ver_full = ocs_ver_info['status']['desired']['version'] m = re.match(r"(\d.\d).(\d)", ocs_ver_full) if m and m.group(1) is not None: ocs_ver = m.group(1) full_results.add_key('ocs_version', ocs_ver) full_results.add_key('vendor', get_provider()) full_results.add_key( 'start_time', time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime())) # Calculating the total size of the working data set - in GB full_results.add_key( 'dataset', file_size * files * threads * full_results.results['clients'] / constants.GB2KB) full_results.add_key( 'global_options', { 'files': files, 'file_size': file_size, 'storageclass': sf_data['spec']['workload']['args']['storageclass'], 'vol_size': sf_data['spec']['workload']['args']['storagesize'] }) while True: logs = bench_pod.exec_oc_cmd(f'logs {small_file_client_pod}', out_yaml_format=False) if "RUN STATUS DONE" in logs: full_results.add_key( 'end_time', time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime())) full_results.read() if not full_results.dont_check: full_results.add_key('hosts', full_results.get_clients_list()) full_results.init_full_results() full_results.aggregate_host_results() test_status = full_results.aggregate_samples_results() full_results.write() # Creating full link to the results on the ES server res_link = 'http://' res_link += f'{full_results.server}:{full_results.port}/' res_link += f'{full_results.new_index}/_search?q=' res_link += f'uuid:{full_results.uuid}' log.info(f'Full results can be found as : {res_link}') else: test_status = True break if timeout < (time.time() - start_time): raise TimeoutError( "Timed out waiting for benchmark to complete") time.sleep(30) assert (not get_logs_with_errors() and test_status), 'Test Failed'
def test_fio_workload_simple(self, ripsaw, es, interface, io_pattern): """ This is a basic fio perf test """ # Deployment ripsaw log.info("Deploying ripsaw operator") ripsaw.apply_crd('resources/crds/' 'ripsaw_v1alpha1_ripsaw_crd.yaml') sc = 'ocs-storagecluster-ceph-rbd' if interface == 'CephBlockPool' else 'ocs-storagecluster-cephfs' # Create fio benchmark log.info("Create resource file for fio workload") fio_cr = templating.load_yaml(constants.FIO_CR_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml es_server = "" es_port = "" if 'elasticsearch' in fio_cr['spec']: if 'server' in fio_cr['spec']['elasticsearch']: es_server = fio_cr['spec']['elasticsearch']['server'] if 'port' in fio_cr['spec']['elasticsearch']: es_port = fio_cr['spec']['elasticsearch']['port'] else: fio_cr['spec']['elasticsearch'] = {} # Use the internal define elastic-search server in the test fio_cr['spec']['elasticsearch'] = { 'server': es.get_ip(), 'port': es.get_port() } # Setting the data set to 40% of the total storage capacity but # not more then 600GiB ceph_cluster = CephCluster() total_data_set = int(ceph_cluster.get_ceph_capacity() * 0.4) filesize = int(fio_cr['spec']['workload']['args']['filesize'].replace( 'GiB', '')) # To make sure the number of App pods will not be more then 50, in case # of large data set, changing the size of the file each pod will work on if total_data_set > 500: filesize = int(ceph_cluster.get_ceph_capacity() * 0.008) fio_cr['spec']['workload']['args']['filesize'] = f'{filesize}GiB' # make sure that the storage size is larger then the file size fio_cr['spec']['workload']['args'][ 'storagesize'] = f'{int(filesize * 1.2)}Gi' fio_cr['spec']['workload']['args']['servers'] = int(total_data_set / filesize) log.info(f'Total Data set to work on is : {total_data_set} GiB') fio_cr['spec']['clustername'] = config.ENV_DATA[ 'platform'] + get_build() + get_ocs_version() fio_cr['spec']['test_user'] = get_ocs_version( ) + interface + io_pattern fio_cr['spec']['workload']['args']['storageclass'] = sc if io_pattern == 'sequential': fio_cr['spec']['workload']['args']['jobs'] = ['write', 'read'] log.info(f'fio_cr: {fio_cr}') fio_cr_obj = OCS(**fio_cr) fio_cr_obj.create() # Wait for fio client pod to be created for fio_pod in TimeoutSampler(300, 20, get_pod_name_by_pattern, 'fio-client', constants.RIPSAW_NAMESPACE): try: if fio_pod[0] is not None: fio_client_pod = fio_pod[0] break except IndexError: log.info("Bench pod not ready yet") # Wait for fio pod to initialized and complete log.info("Waiting for fio_client to complete") pod_obj = OCP(kind='pod') pod_obj.wait_for_resource( condition='Completed', resource_name=fio_client_pod, timeout=18000, sleep=300, ) output = run_cmd(f'oc logs {fio_client_pod}') try: if 'Fio failed to execute' not in output: log.info("FIO has completed successfully") except IOError: log.info("FIO failed to complete") # Clean up fio benchmark log.info("Deleting FIO benchmark") fio_cr_obj.delete() # Setting back the original elastic-search information fio_cr['spec']['elasticsearch'] = { 'server': es_server, 'port': es_port } analyze_regression(io_pattern, sc, es_username=fio_cr['spec']['test_user'])