def report_oltpbench_result(env, server_data, results_dir, username, password, mem_metrics, query_mode='simple'): """ Parse and format the data from server_data and the results_dir into a JSON body and send those results to the performance storage service""" LOG.debug("parsing OLTPBench results and assembling request body.") metadata, timestamp, benchmark_type, parameters, metrics = parse_oltpbench_data( results_dir) add_mem_metrics(metrics, mem_metrics) parameters['query_mode'] = query_mode parameters['max_connection_threads'] = server_data.get( 'max_connection_threads') metadata['environment']['wal_device'] = server_data.get('wal_device') result = { 'metadata': metadata, 'timestamp': timestamp, 'type': benchmark_type, 'parameters': parameters, 'metrics': metrics } send_result(env, '/oltpbench/', username, password, result)
def table_dump(config, artifact_processor): """ Create a human readable table for the output comparison between the current test results and the historical results (if there are any) """ text_table = PrettyTable() text_table.field_names = FIELD_NAMES text_table.align['suite'] = 'l' text_table.align['test'] = 'l' for bench_name in sorted(config.benchmarks): filename = "{}.json".format(bench_name) gbench_run_results = GBenchRunResult.from_benchmark_file(filename) for key in sorted(gbench_run_results.benchmarks.keys()): result = gbench_run_results.benchmarks.get(key) LOG.debug("%s Result:\n%s", bench_name, result) comparison = artifact_processor.get_comparison( bench_name, result, config.lax_tolerance) if comparison.get('pass') == 'FAIL': ret = 1 row_values = list( map( lambda field_name: formatFields( field_name, comparison.get(field_name, 0)), text_table.field_names)) text_table.add_row(row_values) print("") print(text_table) print("")
def convert(self, output_file): """ Write results to a JUnit compatible xml file """ LOG.debug("Converting Google Benchmark {NAME} to JUNIT file {JUNIT_FILE}".format( NAME=self.name, JUNIT_FILE=output_file )) tree = ElementTree.ElementTree() test_suite_el = ElementTree.Element("testsuite") tree._setroot(test_suite_el) # add attributes to root, testsuite element for el_name in ["errors", "failures", "skipped", "tests", "name"]: test_suite_el.set(el_name, getattr(self, el_name)) # add test results for test in self.testcases: test_el = ElementTree.SubElement(test_suite_el, "testcases") test_el.set("classname", test.suite_name) test_el.set("name", test.name) tree.write(output_file, xml_declaration=True, encoding='utf8') return
def _execute_benchmark(self, cmd): """ Execute the microbenchmark command provided. Parameters ---------- cmd : str The command to be executed in order to run the microbenchmark. Returns ------- ret_code : int The return value from the benchmark process. 0 if successful. err : Error The error that occured. None if successful. """ LOG.debug( f'Executing command [num_threads={self.config.num_threads}]: {cmd}' ) try: output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) pretty_format_json = json.dumps(json.loads( output.decode('utf8').replace("'", '"')), indent=4) LOG.debug(f'OUTPUT: {pretty_format_json}') return 0, None except subprocess.CalledProcessError as err: print(err) return err.returncode, err except Exception as err: return 1, err
def send_results(config, artifact_processor): """ Iterate over the microbenchmark results to generate a comparison against the histroical artifacts and send that comparrison to the performance storage service """ ret_code = 0 for bench_name in sorted(config.benchmarks): filename = "{}.json".format(bench_name) gbench_run_results = GBenchRunResult.from_benchmark_file(filename) for key in sorted(gbench_run_results.benchmarks.keys()): result = gbench_run_results.benchmarks.get(key) LOG.debug("%s Result:\n%s", bench_name, result) comparison = artifact_processor.get_comparison_for_publish_result( bench_name, result, config.lax_tolerance) try: report_microbenchmark_result(config.publish_results_env, result.timestamp, config, comparison) except Exception as err: LOG.error( "Error reporting results to performance storage service") LOG.error(err) ret_code = 1 return ret_code
def read_gbench_results(gbench_results_file): """ Based on a test result file get a list of all the GBenchTestResult objects """ LOG.debug("Reading results file {}".format(gbench_results_file)) testcases = [] with open(gbench_results_file) as gbench_result_file: gbench_data = json.load(gbench_result_file) for benchmark in gbench_data.get('benchmarks'): test_result = GBenchTestResult(benchmark) testcases.append(test_result) return testcases
def send_result(env, path, username, password, result): """ Send the results to the performance storage service. If the service responds with an error code this will raise an error. """ LOG.debug("Sending request to {PATH}".format(PATH=path)) base_url = get_base_url(env) try: result = requests.post(base_url + path, json=result, auth=(username, password)) result.raise_for_status() except Exception as err: LOG.error(err.response.text) raise
def generate_numa_command(): """ Return the command line string to execute numactl """ # use all the cpus from the highest numbered numa node nodes = subprocess.check_output( "numactl --hardware | grep 'available: ' | cut -d' ' -f2", shell=True) if not nodes: return "" highest_cpu_node = int(nodes) - 1 if highest_cpu_node > 0: LOG.debug("Number of NUMA Nodes = {}".format(highest_cpu_node)) LOG.debug("Enabling NUMA support") return "numactl --cpunodebind={} --preferred={}".format( highest_cpu_node, highest_cpu_node)
def get_build_path(build_type): """ Get the path to the binary """ path_list = [ ("standard", "build"), ("CLion", "cmake-build-{}".format(build_type)), ] for _, path in path_list: db_bin_path = os.path.join(DIR_REPO, path, "bin", DEFAULT_DB_BIN) LOG.debug(f'Tring to find build path in {db_bin_path}') if os.path.exists(db_bin_path): return db_bin_path raise RuntimeError(f'No DB binary found in {path_list}')
def get_artifact_data(self, folders, project, branch, build_number, artifact_path): """ Returns a dict with details about the Jenkins artifact. The contents of the artifact must be JSON for this to work. """ artifact_url = "{BASE_URL}/{JOB_PATH}/{BUILD_NUM}/artifact/{ARTIFACT}".format(BASE_URL=self.base_url, JOB_PATH=create_job_path(folders, project, branch), BUILD_NUM=build_number, ARTIFACT=artifact_path) try: LOG.debug("Retrieving JSON artifact data from {URL}".format( URL=artifact_url)) response = requests.get(artifact_url) response.raise_for_status() return response.json() except Exception as err: LOG.error("Unexpected error when retrieving Jenkins artifact") LOG.error(err) raise
def get_build_data(self, folders, project, branch, build_number=1): """ Returns a dict with details about the Jenkins build. This contains information about the artifacts associtaed with the build """ build_url = "{BASE_URL}/{JOB_PATH}/{BUILD_NUM}/api/json".format( BASE_URL=self.base_url, JOB_PATH=create_job_path(folders, project, branch), BUILD_NUM=build_number) try: LOG.debug( "Retrieving Jenkins JSON build data from {URL}".format(URL=build_url)) response = requests.get(build_url) response.raise_for_status() return response.json() except Exception as err: LOG.error("Unexpected error when retrieving Jenkins build data") LOG.error(err) raise
def get_job_data(self, folders, project, branch): """ Returns a dict with details about the Jenkins job. This contains information about the builds associtaed with the job """ job_url = "{BASE_URL}/{JOB_PATH}/api/json".format( BASE_URL=self.base_url, JOB_PATH=create_job_path(folders, project, branch)) try: LOG.debug( "Retrieving list of Jenkins builds from {URL}".format(URL=job_url)) response = requests.get(job_url) response.raise_for_status() return response.json() except Exception as err: LOG.error("Unexpected error when retrieving list of Jenkins builds") LOG.error(err) raise
def copy_benchmark_result(bench_name, build_dir): """ Copy the benchmark result file. This is used when running in local mode. Parameters ---------- bench_name : str The name of the microbenchmark. build_dir : str The path to the build directory. """ result_file = f'{bench_name}.json' shutil.copy(result_file, build_dir) LOG.debug(f'Copything result file {result_file} into {build_dir}')
def generate_perf_command(bench_name): """ Create the command line string to execute perf. Parameters ---------- bench_name : str The name of the benchmark. Returns ------- perf_cmd : str The command to execute pref data collection. """ perf_output_file = f'{bench_name}.perf' LOG.debug(f'Enabling perf data collection [output={perf_output_file}]') return f'perf record --output={perf_output_file}'
def _execute_benchmark(self, cmd): """ Execute the microbenchmark command provided """ LOG.debug("Executing command [num_threads={}]: {}".format( self.config.num_threads, cmd)) try: output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) pretty_format_json = json.dumps(json.loads( output.decode('utf8').replace("'", '"')), indent=4) LOG.debug("OUTPUT: {}".format(pretty_format_json)) return 0, None except subprocess.CalledProcessError as err: return err.returncode, err except Exception as err: return 1, err
def report_microbenchmark_result(env, timestamp, config, artifact_processor_comparison): """ Parse and format the data from the microbenchmark tests into a JSON body and send those to the performance storage service""" LOG.debug("parsing OLTPBench results and assembling request body.") metadata, test_suite, test_name, metrics = parse_microbenchmark_data(artifact_processor_comparison) parameters = parse_parameters(config) metadata['environment']['wal_device'] = parse_wal_device(config) result = { 'metadata': metadata, 'timestamp': int(timestamp.timestamp() * 1000), # convert to milliseconds 'test_suite': test_suite, 'test_name': test_name, 'parameters': parameters, 'metrics': metrics } send_result(env, '/microbenchmark/', config.publish_results_username, config.publish_results_password, result)
def from_benchmark_file(cls, result_file_name): """ Create a GBenchRunResult by passing it a json file generated by Google benchmark """ LOG.debug( "Loading local benchmark result file {}".format(result_file_name)) with open(result_file_name) as result_file: try: raw_results = result_file.read() results = json.loads(raw_results) return cls(results) except Exception as err: LOG.error( "Invalid data read from benchmark result file {}".format( result_file_name)) LOG.error(err) LOG.error(raw_results) raise
def generate_numa_command(): """ Create the command line string to execute numactl. Returns ------- numa_cmd : str The command to execute using NUMA. """ # use all the cpus from the highest numbered numa node nodes = subprocess.check_output( "numactl --hardware | grep 'available: ' | cut -d' ' -f2", shell=True) if not nodes: return '' highest_cpu_node = int(nodes) - 1 if highest_cpu_node > 0: LOG.debug(f'Number of NUMA Nodes = {highest_cpu_node}') LOG.debug('Enabling NUMA support') return f'numactl --cpunodebind={highest_cpu_node} --preferred={highest_cpu_node}'
def stop_db(self, is_dry_run=False): """ Stop the Db server and print it's log file """ if not self.db_process or is_dry_run: LOG.debug('DB has already been stopped.') return # get exit code if any self.db_process.poll() if self.db_process.returncode is not None: # DB already terminated msg = f'DB terminated with return code {self.db_process.returncode}' LOG.info(msg) self.print_db_logs() raise RuntimeError(msg) else: # still (correctly) running, terminate it self.db_process.terminate() LOG.info("Stopped DB successfully") self.db_process = None
def has_min_history(self): """ Check whether all the collected artifacts have at least the minimum number of results. Returns ------- bool Whether there were enough historical results. """ if not self.required_num_results: LOG.debug("required_num_results is not set???") return False if len(self.artifacts) == 0: LOG.debug("No artifacts available") return False for key in self.artifacts.keys(): artifact = self.artifacts.get(key) (suite_name, test_name) = key LOG.debug( "# of artifacts for {SUITE}.{TEST}: {NUM_ARTIFACTS} [required={REQUIRED_ARTIFACTS}]" .format(SUITE=suite_name, TEST=test_name, NUM_ARTIFACTS=artifact.get_num_results(), REQUIRED_ARTIFACTS=self.required_num_results)) if artifact.get_num_results() < self.required_num_results: return False return True
def load_local_artifacts(self, latest_local_build_dir): """ Load artifacts when run in local mode. It reads the results from the local directory structure and stores them in the artifact processor. Parameters ---------- latest_local_build_dir : str The path to the latest local build dir. The directories increment 001, 002, 003, etc. This is the highest one. """ LOG.debug("Processing local data repository {}".format(LOCAL_REPO_DIR)) local_build_dirs = reversed(sorted(next(os.walk(LOCAL_REPO_DIR))[1])) for build_dir in local_build_dirs: if os.path.basename(build_dir) == latest_local_build_dir: LOG.debug("Skipping data dir {}".format(build_dir)) continue LOG.debug( "Reading results from local directory {}".format(build_dir)) for build_file in glob( os.path.join(LOCAL_REPO_DIR, build_dir, '*.json')): gbench_run_results = GBenchRunResult.from_benchmark_file( build_file) self.add_artifact(gbench_run_results) # Determine if we have enough history. Stop collecting information if we do if self.has_min_history(): return
type=str, help="Performance Storage Service Username") parser.add_argument("--publish-password", type=str, help="Performance Storage Service password") args = parser.parse_args() # ------------------------------------------------------- # Set config # ------------------------------------------------------- if args.debug: LOG.setLevel(logging.DEBUG) LOG.debug("args: {}".format(args)) config_args = {'publish_results_env': args.publish_results} if args.num_threads: config_args['num_threads'] = args.num_threads if args.logfile_path: config_args['logfile_path'] = args.logfile_path if args.benchmark_path: config_args['benchmark_path'] = args.benchmark_path if args.local: config_args['is_local'] = args.local if args.benchmark: config_args['benchmarks'] = sorted(args.benchmark) if args.folders: config_args['jenkins_folders'] = args.folders if args.branch:
def copy_benchmark_result(bench_name, build_dir): """ Copy the benchmark result file. This is used when running in local mode """ result_file = "{}.json".format(bench_name) shutil.copy(result_file, build_dir) LOG.debug("Copything result file {FROM} into {TO}".format(FROM=result_file, TO=build_dir))
def _create_comparison_dict(self, bench_name, gbench_result, ref_type='none', lax_tolerance=None): """ Creates the comparison dict based on the ref type. It takes a GBenchTestResult and compares it against the results for the same benchmark stored in the artifact processor. Parameters ---------- bench_name : str The name of the microbenchmark. gbench_result : GBenchRunResult The result to compare against. ref_type : str, optional The type of comparison to do against the historical reference data. The options are 'none', 'lax', 'historic'. 'none' does no comparison, 'lax' uses a relaxed threshold, and 'historic' uses normal thresholds. (The default is 'none'). lax_tolerance : int, optional The allowed level of tolerance for performance. This is only used if there are not enough historical results. (The default is None). Returns ------- dict An object with information about the microbenchmark results and how it compares to the historical results. """ key = (gbench_result.suite_name, gbench_result.test_name) LOG.debug( "Loading {REF_TYPE} history data for {SUITE_NAME}.{TEST_NAME} [{BENCH_NAME}]" .format(REF_TYPE=ref_type, SUITE_NAME=gbench_result.suite_name, TEST_NAME=gbench_result.test_name, BENCH_NAME=bench_name)) comparison = { "suite": gbench_result.suite_name, "test": gbench_result.test_name, "num_results": 0, "throughput": gbench_result.items_per_second, "iterations": gbench_result.iterations, "tolerance": 0, "reference_type": ref_type, "status": "PASS" } if ref_type != 'none': historical_results = self.artifacts.get(key) if historical_results.get_mean_throughput() <= 0 or \ not comparison.get('throughput') or comparison.get('throughput') <= 0: return comparison comparison['num_results'] = historical_results.get_num_results() comparison['tolerance'] = BENCHMARKS_TO_RUN[ bench_name] if ref_type == 'historic' else lax_tolerance comparison[ 'ref_throughput'] = historical_results.get_mean_throughput() comparison[ 'coef_var'] = 100 * historical_results.get_stdev_throughput( ) / comparison.get('ref_throughput') comparison['change'] = 100 * (gbench_result.items_per_second - comparison.get('ref_throughput') ) / comparison.get('ref_throughput') comparison['status'] = 'PASS' if is_comparison_pass( comparison.get('ref_throughput'), comparison.get('throughput'), comparison.get('tolerance')) else 'FAIL' return comparison
def __init__(self, required_num_results=None): # key = (suite_name, test_name) self.artifacts = {} self.required_num_results = required_num_results LOG.debug("min_ref_values: {}".format(required_num_results)) return
def has_db_started(raw_db_log_line, port, pid): """ Check whether the db has started by checking its log """ log_line = raw_db_log_line.decode("utf-8").rstrip("\n") LOG.debug(log_line) check_line = f'[info] Listening on Unix domain socket with port {port} [PID={pid}]' return log_line.endswith(check_line)
def generate_perf_command(bench_name): """ Return the command line string to execute perf """ perf_output_file = "{}.perf".format(bench_name) LOG.debug( "Enabling perf data collection [output={}]".format(perf_output_file)) return "perf record --output={}".format(perf_output_file)