def export_results(self): """ Makes calls to _write_results out to streams or files. If exporting results is requested, uses a FileWriter for specified output files. """ # Results exported to export_path/results results_export_directory = os.path.join(self._config.export_path, 'results') os.makedirs(results_export_directory, exist_ok=True) # Configure server only results path and export results server_metrics_path = os.path.join(results_export_directory, self._config.filename_server_only) logger.info( f"Exporting server only metrics to {server_metrics_path}...") self._export_server_only_csv( writer=FileWriter(filename=server_metrics_path), column_separator=',') # Configure model metrics results path and export results metrics_inference_path = os.path.join( results_export_directory, self._config.filename_model_inference) metrics_gpu_path = os.path.join(results_export_directory, self._config.filename_model_gpu) logger.info( f"Exporting inference metrics to {metrics_inference_path}...") logger.info(f"Exporting GPU metrics to {metrics_gpu_path}...") self._export_model_csv( inference_writer=FileWriter(filename=metrics_inference_path), gpu_metrics_writer=FileWriter(filename=metrics_gpu_path), column_separator=',')
def _execute_run_configs(self): """ Executes the run configs stored in the run config generator until there are none left. Returns obtained measurements. Also sends them to the result manager """ measurements = [] while self._run_config_generator.run_configs(): # Remove one run config from the list run_config = self._run_config_generator.next_config() # Start server, and load model variant self._server.start() if not self._create_and_load_model_variant( original_name=run_config.model_name(), variant_config=run_config.model_config()): continue # Profile various batch size and concurrency values. # TODO: Need to sort the values for batch size and concurrency # for correct measurment of the GPU memory metrics. perf_output_writer = None if \ not self._config.perf_output else FileWriter() perf_config = run_config.perf_config() logging.info(f"Profiling model {perf_config['model-name']}...") gpu_data, non_gpu_data = self._metrics_manager.profile_model( perf_config=perf_config, perf_output_writer=perf_output_writer) if gpu_data is not None and non_gpu_data is not None: measurement = Measurement(gpu_data=gpu_data, non_gpu_data=non_gpu_data, perf_config=perf_config) self._result_manager.add_measurement(run_config, measurement) measurements.append(measurement) self._server.stop() if self._config.triton_output_path: self._server.write_server_logs(self._config.triton_output_path) return measurements
def write_results(self): """ Writes table to console """ self._write_results(writer=FileWriter(), column_separator=' ')
def test_write(self): test_handle = StringIO() writer = FileWriter(file_handle=test_handle) # Write test using create if not exist mode writer.write('test') # read file self.assertEqual(test_handle.getvalue(), 'test') # redirect stdout and create writer with no filename test_handle = StringIO() old_stdout = sys.stdout sys.stdout = test_handle writer = FileWriter() writer.write('test') sys.stdout.flush() sys.stdout = old_stdout self.assertEqual(test_handle.getvalue(), 'test') test_handle.close() # Check for malformed calls err_str = "Expected TritonModelAnalyzerException on malformed input." writer = FileWriter(file_handle=test_handle) with self.assertRaises(Exception, msg=err_str): writer.write('test')
def profile_model(self, run_config): """ Runs monitors while running perf_analyzer with a specific set of arguments. This will profile model inferencing. Parameters ---------- run_config : RunConfig RunConfig object corresponding to the models being profiled. Returns ------- (dict of lists, list) The gpu specific and non gpu metrics """ # TODO TMA-518 model_run_config = run_config.model_run_configs()[0] # TODO: Need to sort the values for batch size and concurrency # for correct measurment of the GPU memory metrics. perf_output_writer = None if \ not self._config.perf_output else FileWriter(self._config.perf_output_path) perf_config = model_run_config.perf_config() logger.info(f"Profiling model {perf_config['model-name']}...") cpu_only = model_run_config.model_config().cpu_only() perf_config = model_run_config.perf_config() # Inform user CPU metric(s) are not being collected under CPU mode collect_cpu_metrics_expect = cpu_only or len(self._gpus) == 0 collect_cpu_metrics_actual = len(self._cpu_metrics) > 0 if collect_cpu_metrics_expect and not collect_cpu_metrics_actual: logger.info( "CPU metric(s) are not being collected, while this profiling will run on CPU(s)." ) # Warn user about CPU monitor performance issue if collect_cpu_metrics_actual: logger.warning("CPU metric(s) are being collected.") logger.warning( "Collecting CPU metric(s) can affect the latency or throughput numbers reported by perf analyzer." ) # Start monitors and run perf_analyzer self._start_monitors(cpu_only=cpu_only) perf_analyzer_metrics_or_status = self._get_perf_analyzer_metrics( perf_config, perf_output_writer, perf_analyzer_env=run_config.triton_environment()) # Failed Status if perf_analyzer_metrics_or_status == 1: self._stop_monitors(cpu_only=cpu_only) self._destroy_monitors(cpu_only=cpu_only) return None else: perf_analyzer_metrics = perf_analyzer_metrics_or_status # Get metrics for model inference and combine metrics that do not have GPU UUID model_gpu_metrics = {} if not cpu_only: model_gpu_metrics = self._get_gpu_inference_metrics() model_cpu_metrics = self._get_cpu_inference_metrics() self._destroy_monitors(cpu_only=cpu_only) model_non_gpu_metrics = list(perf_analyzer_metrics.values()) + list( model_cpu_metrics.values()) measurement = None if model_gpu_metrics is not None and model_non_gpu_metrics is not None: measurement = Measurement(gpu_data=model_gpu_metrics, non_gpu_data=model_non_gpu_metrics, perf_config=perf_config) self._result_manager.add_measurement(model_run_config, measurement) return measurement
def test_write(self): # Create and use writer writer = FileWriter(filename=TEST_FILENAME) # Check for exception on open and write self.io_mock.raise_exception_on_open() err_str = "Expected TritonModelAnalyzerException on malformed input." with self.assertRaises(TritonModelAnalyzerException, msg=err_str): writer.write('test') self.io_mock.reset() self.io_mock.raise_exception_on_write() err_str = "Expected TritonModelAnalyzerException on malformed input." with self.assertRaises(TritonModelAnalyzerException, msg=err_str): writer.write('test') self.io_mock.reset() # Check mock call on successful write writer.write('test') self.io_mock.assert_write_called_with_args('test') # Perform checks for stdout writer = FileWriter() writer.write('test') self.io_mock.assert_print_called_with_args('test')