def _get_benchmark_methods(self): """Returns list of benchmark methods to execute.""" filter_prefix = 'filter:' benchmark_methods = [] for benchmark_method_pattern in self.config.benchmark_method_patterns: if filter_prefix not in benchmark_method_pattern: benchmark_methods.append(benchmark_method_pattern) else: index = benchmark_method_pattern.find(filter_prefix) benchmark_class = benchmark_method_pattern[:index - 1] pattern = benchmark_method_pattern[index + len(filter_prefix):] class_instance = utils.instantiate_benchmark_class( benchmark_class, '/dev/null', '', None, {}) for benchmark_method_name in dir(class_instance): if re.match(pattern, benchmark_method_name): benchmark_methods.append(benchmark_class + '.' + benchmark_method_name) logging.info('The following benchmark methods will be executed: %s', benchmark_methods) return benchmark_methods
def _run_internal(benchmark_method, harness_info, site_package_info, root_output_dir, config, queue): """Run benchmark method and put result to the queue. Args: benchmark_method: Canonical path to the benchmark method harness_info: Description of the benchmark harness used in the benchmark site_package_info: Description of the site-package used in the benchmark root_output_dir: Directory under which to put the benchmark output config: An instance of perfzero_config queue: An interprocess queue to transfer benchmark result to the caller """ start_timestamp = time.time() execution_timestamp = start_timestamp method_has_exception = False execution_id = (config.execution_id if config.execution_id else datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')) output_dir = os.path.join(root_output_dir, execution_id) if config.scratch_gcs_url: model_output_dir = os.path.join(config.scratch_gcs_url, execution_id) else: model_output_dir = output_dir utils.make_dir_if_not_exist(output_dir) benchmark_class, benchmark_method_name = benchmark_method.rsplit('.', 1) benchmark_class_name = benchmark_class.rsplit('.', 1)[1] tensorflow_profiler = TensorFlowProfiler(config.profiler_enabled_time_str, output_dir) process_info_tracker = ProcessInfoTracker(output_dir) process_info = None # Setup per-method file logger filehandler = logging.FileHandler(filename=os.path.join( output_dir, 'perfzero.log'), mode='w') filehandler.setFormatter( logging.Formatter('%(asctime)s %(levelname)s: %(message)s')) logging.getLogger().addHandler(filehandler) try: if config.tpu_parameters: tpu = config.tpu_parameters.get('name') else: tpu = None if config.perfzero_constructor_args: constructor_args = json.loads(config.perfzero_constructor_args) else: constructor_args = {} class_instance = utils.instantiate_benchmark_class( benchmark_class=benchmark_class, output_dir=model_output_dir, root_data_dir=config.root_data_dir, tpu=tpu, constructor_args=constructor_args) # tf.test.Benchmark.report_benchmark() writes results to a file with # path benchmark_result_file_path_prefix + benchmark_method benchmark_result_file_path_prefix = os.path.join(output_dir, 'proto_') os.environ[ 'TEST_REPORT_FILE_PREFIX'] = benchmark_result_file_path_prefix benchmark_result_file_path = '{}{}.{}'.format( benchmark_result_file_path_prefix, benchmark_class_name, benchmark_method_name) # Start background threads for profiler and system info tracker tensorflow_profiler.start() process_info_tracker.start() # Run benchmark method execution_timestamp = time.time() logging.info('Starting benchmark execution: %s', benchmark_method) getattr(class_instance, benchmark_method_name)() logging.info('Stopped benchmark: %s', benchmark_method) # Read and build benchmark results raw_benchmark_result = utils.read_benchmark_result( benchmark_result_file_path) # Explicitly overwrite the name to be the full path to benchmark method raw_benchmark_result['name'] = benchmark_method except Exception: # pylint: disable=broad-except logging.error('Benchmark execution for %s failed due to error:\n %s', benchmark_method, traceback.format_exc()) method_has_exception = True raw_benchmark_result = {} raw_benchmark_result['name'] = benchmark_method raw_benchmark_result['wall_time'] = -1 raw_benchmark_result['extras'] = {} finally: # Stop background threads for profiler and system info tracker process_info = process_info_tracker.stop() tensorflow_profiler.stop() upload_timestamp = time.time() benchmark_result = report_utils.build_benchmark_result( raw_benchmark_result, method_has_exception) execution_summary = report_utils.build_execution_summary( execution_timestamp, execution_id, config.ml_framework_build_label, config.execution_label, config.platform_name, config.system_name, config.output_gcs_url, benchmark_result, config.get_env_vars(), config.get_flags(), harness_info, site_package_info, process_info, method_has_exception) report_utils.upload_execution_summary(config.bigquery_project_name, config.bigquery_dataset_table_name, execution_summary) report_utils.execute_methods(config.result_upload_methods, execution_summary) logging.info('Benchmark execution for %s completed with summary:\n %s', benchmark_method, json.dumps(execution_summary, indent=2)) _set_file_contents(json.dumps(execution_summary, indent=2), os.path.join(output_dir, 'perfzero_summary.json')) utils.maybe_upload_to_gcs(output_dir, config.output_gcs_url) logging.getLogger().removeHandler(filehandler) method_execution_time = { 'class_initialization': execution_timestamp - start_timestamp, 'method_execution': upload_timestamp - execution_timestamp, 'log_upload': time.time() - upload_timestamp } if config.profiler_enabled_time_str: relative_output_dir = output_dir[output_dir.find('benchmark'):] print('\nExecute the command below to start tensorboard server using ' 'the collected profiler data:\ntensorboard --logdir={}\n\n' 'Open localhost:6006 in your browser to access the Tensorbord ' 'GUI. Use ssh with port forwarding if tensorboard is running on ' 'a remote machine.\n'.format(relative_output_dir)) queue.put((method_has_exception, method_execution_time, benchmark_result['succeeded'], output_dir))