def _test_create_server(self, gpus): # Create a TritonServerConfig server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Run for both types of environments self.server = TritonServerFactory.create_server_docker( image=TRITON_IMAGE, config=server_config, gpus=gpus) self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus) # Try to create a server without specifying model repository and expect # error server_config['model-repository'] = None with self.assertRaises( AssertionError, msg="Expected AssertionError for trying to create" "server without specifying model repository."): self.server = TritonServerFactory.create_server_docker( image=TRITON_IMAGE, config=server_config, gpus=gpus) with self.assertRaises( AssertionError, msg="Expected AssertionError for trying to create" "server without specifying model repository."): self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)
def test_cpu_stats(self): server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH gpus = ['all'] # Test local server cpu_stats self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config) self.server.start() _, _ = self.server.cpu_stats() self.server_local_mock.assert_cpu_stats_called() self.server.stop() # Test docker server cpu stats self.server = TritonServerFactory.create_server_docker( image=TRITON_IMAGE, config=server_config, gpus=gpus) self.server.start() # The following needs to be called as it resets exec_run return value self.server_docker_mock.assert_server_process_start_called_with( TRITON_DOCKER_BIN_PATH + ' ' + server_config.to_cli_string(), MODEL_REPOSITORY_PATH, TRITON_IMAGE, 8000, 8001, 8002) _, _ = self.server.cpu_stats() self.server_docker_mock.assert_cpu_stats_called() self.server.stop()
def _test_cpu_stats(self, gpus): device_requests = [device.device_id() for device in gpus] # Create a TritonServerConfig server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Test local server cpu_stats self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus) self.server.start() _, _ = self.server.cpu_stats() self.server_local_mock.assert_cpu_stats_called() self.server.stop() # Test docker server cpu stats self.server = TritonServerFactory.create_server_docker( image=TRITON_IMAGE, config=server_config, gpus=gpus) self.server.start() # The following needs to be called as it resets exec_run return value self.server_docker_mock.assert_server_process_start_called_with( f'{TRITON_DOCKER_BIN_PATH} {server_config.to_cli_string()}', MODEL_REPOSITORY_PATH, TRITON_IMAGE, device_requests, gpus, 8000, 8001, 8002) _, _ = self.server.cpu_stats() self.server_docker_mock.assert_cpu_stats_called() self.server.stop()
def test_monitor_disable(self): server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH gpus = [ GPUDevice('TEST_DEVICE_NAME', 0, "TEST_PCI_BUS_ID", "TEST_UUID") ] frequency = 1 monitoring_time = 2 metrics = [] server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus) # Start triton and monitor server.start() cpu_monitor = CPUMonitor(server, frequency, metrics) cpu_monitor.start_recording_metrics() time.sleep(monitoring_time) records = cpu_monitor.stop_recording_metrics() # Assert no library calls self.server_local_mock.assert_cpu_stats_not_called() cpu_monitor.destroy() server.stop()
def start_stop_docker_args(self): device_requests, gpu_uuids = self._find_correct_gpu_settings( self._sys_gpus) # Create a TritonServerConfig server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Create mounts and labels mounts = [ '/host/path:/dest/path:ro', '/another/host/path:/some/dest/path:rw' ] labels = {'RUNNER_ID': 'TEST_RUNNER_ID'} environment = {'VARIABLE': 'VALUE'} # Create server in docker, start , wait, and stop self.server = TritonServerFactory.create_server_docker( image=TRITON_IMAGE, config=server_config, gpus=self._sys_gpus, mounts=mounts, labels=labels) # Start server check that mocked api is called self.server.start(env=environment) self.server_docker_mock.assert_server_process_start_called_with( f"{TRITON_DOCKER_BIN_PATH} {server_config.to_cli_string()}", MODEL_REPOSITORY_PATH, TRITON_IMAGE, device_requests, gpu_uuids, 8000, 8001, 8002, mounts, labels) # Stop container and check api calls self.server.stop() self.server_docker_mock.assert_server_process_terminate_called()
def test_measurement_request_count_increase(self): server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Create server, client, PerfAnalyzer, and wait for server ready self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus) perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config, max_retries=10, timeout=100, max_cpu_util=50) self.client = TritonClientFactory.create_grpc_client( server_url=TEST_GRPC_URL) self.server.start() # Test the timeout for count mode self.client.wait_for_server_ready(num_retries=1) test_both_output = "Please use a larger time window" self.perf_mock.set_perf_analyzer_result_string(test_both_output) self.perf_mock.set_perf_analyzer_return_code(1) perf_metrics = [PerfThroughput, PerfLatencyP99] perf_analyzer.run(perf_metrics) self.assertEqual( self.perf_mock.get_perf_analyzer_popen_read_call_count(), 10)
def test_measurement_interval_increase(self): server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Create server, client, PerfAnalyzer, and wait for server ready self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus) perf_analyzer_config = PerfAnalyzerConfig() perf_analyzer_config['model-name'] = TEST_MODEL_NAME perf_analyzer_config['concurrency-range'] = TEST_CONCURRENCY_RANGE perf_analyzer_config['measurement-mode'] = 'time_windows' perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config, max_retries=10, timeout=100, max_cpu_util=50) self.client = TritonClientFactory.create_grpc_client( server_url=TEST_GRPC_URL) self.server.start() # Test failure to stabilize for measurement windows self.client.wait_for_server_ready(num_retries=1) test_stabilize_output = "Please use a larger time window" self.perf_mock.set_perf_analyzer_result_string(test_stabilize_output) self.perf_mock.set_perf_analyzer_return_code(1) perf_metrics = [PerfThroughput, PerfLatencyP99] perf_analyzer.run(perf_metrics) self.assertEqual( self.perf_mock.get_perf_analyzer_popen_read_call_count(), 10)
def test_run(self): server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Create server, client, PerfAnalyzer, and wait for server ready self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config) perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config, timeout=100, max_cpu_util=50) self.client = TritonClientFactory.create_grpc_client( server_url=TEST_GRPC_URL) self.server.start() self.client.wait_for_server_ready(num_retries=1) # Run perf analyzer with dummy metrics to check command parsing perf_metrics = [id] test_latency_output = "p99 latency: 5000 us\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_latency_output) perf_analyzer.run(perf_metrics) self.perf_mock.assert_perf_analyzer_run_as([ PERF_BIN_PATH, '-m', TEST_MODEL_NAME, '--measurement-interval', str(self.config['measurement-interval']) ]) # Test latency parsing test_latency_output = "p99 latency: 5000 us\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_latency_output) perf_metrics = [PerfLatency] perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 5) # Test throughput parsing test_throughput_output = "Throughput: 46.8 infer/sec\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_throughput_output) perf_metrics = [PerfThroughput] perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 46.8) # Test parsing for both test_both_output = "Throughput: 0.001 infer/sec\np99 latency: 3600 us\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_both_output) perf_metrics = [PerfThroughput, PerfLatency] perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 2) self.assertEqual(records[0].value(), 0.001) self.assertEqual(records[1].value(), 3.6) # Test exception handling self.perf_mock.set_perf_analyzer_return_code(1) self.assertTrue(perf_analyzer.run(perf_metrics), 1) self.server.stop()
def test_run(self): server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Create server, client, PerfAnalyzer, and wait for server ready self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config) perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config) self.client = TritonClientFactory.create_grpc_client( server_url=TEST_GRPC_URL) self.server.start() self.client.wait_for_server_ready(num_retries=1) # Run perf analyzer with dummy metrics to check command parsing perf_metrics = [id] test_latency_output = "Avg latency: 5000 ms\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_latency_output) perf_analyzer.run(perf_metrics) self.perf_mock.assert_perf_analyzer_run_as( [PERF_BIN_PATH, '-m', TEST_MODEL_NAME]) # Test latency parsing test_latency_output = "Avg latency: 5000 ms\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_latency_output) perf_metrics = [PerfLatency] perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 5000) # Test throughput parsing test_throughput_output = "Throughput: 46.8 infer/sec\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_throughput_output) perf_metrics = [PerfThroughput] perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 46.8) # Test parsing for both test_both_output = "Throughput: 0.001 infer/sec\nAvg latency: 3.6 ms\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_both_output) perf_metrics = [PerfThroughput, PerfLatency] perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 2) self.assertEqual(records[0].value(), 0.001) self.assertEqual(records[1].value(), 3.6) # Test exception handling with self.assertRaisesRegex( expected_exception=TritonModelAnalyzerException, expected_regex="Running perf_analyzer with", msg="Expected TritonModelAnalyzerException"): self.perf_mock.raise_exception_on_run() perf_analyzer.run(perf_metrics) self.server.stop()
def test_start_wait_stop_gpus(self): # Create a TritonServerConfig server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Create server in docker, start , wait, and stop self.server = TritonServerFactory.create_server_docker( model_path=MODEL_LOCAL_PATH, image=TRITON_IMAGE, config=server_config) # Set mock status_code to error, and generate exception self._mock_server_wait_for_ready(assert_raises=True) # Start server check that mocked api is called self.server.start() self.server_docker_mock.assert_server_process_start_called_with( TRITON_DOCKER_BIN_PATH + ' ' + server_config.to_cli_string(), MODEL_LOCAL_PATH, MODEL_REPOSITORY_PATH, TRITON_IMAGE, 8000, 8001, 8002) # Mock status code for connected server then stop self._mock_server_wait_for_ready(assert_raises=False) # Stop container and check api calls self.server.stop() self.server_docker_mock.assert_server_process_terminate_called() # Create local server which runs triton as a subprocess self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config) self._mock_server_wait_for_ready(assert_raises=True) # Check that API functions are called self.server.start() self.server_local_mock.assert_server_process_start_called_with(cmd=[ TRITON_LOCAL_BIN_PATH, '--model-repository', MODEL_REPOSITORY_PATH ]) self._mock_server_wait_for_ready(assert_raises=False) self.server.stop() self.server_local_mock.assert_server_process_terminate_called()
def _test_start_stop_gpus(self, gpus): device_requests = [device.device_id() for device in gpus] # Create a TritonServerConfig server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Create server in docker, start , wait, and stop self.server = TritonServerFactory.create_server_docker( image=TRITON_IMAGE, config=server_config, gpus=gpus) # Start server check that mocked api is called self.server.start() self.server_docker_mock.assert_server_process_start_called_with( f"{TRITON_DOCKER_BIN_PATH} {server_config.to_cli_string()}", MODEL_REPOSITORY_PATH, TRITON_IMAGE, device_requests, gpus, 8000, 8001, 8002) self.server_docker_mock.raise_exception_on_container_run() with self.assertRaises(TritonModelAnalyzerException): self.server.start() self.server_docker_mock.stop_raise_exception_on_container_run() # Stop container and check api calls self.server.stop() self.server_docker_mock.assert_server_process_terminate_called() # Create local server which runs triton as a subprocess self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus) # Check that API functions are called self.server.start() self.server_local_mock.assert_server_process_start_called_with( cmd=[ TRITON_LOCAL_BIN_PATH, '--model-repository', MODEL_REPOSITORY_PATH ], gpus=gpus) self.server.stop() self.server_local_mock.assert_server_process_terminate_called()
def _test_get_logs(self, gpus): # Create a TritonServerConfig server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Check docker server logs self.server = TritonServerFactory.create_server_docker( image=TRITON_IMAGE, config=server_config, gpus=gpus) self.server.start() self.server.stop() self.server_docker_mock.assert_server_process_terminate_called() self.assertEqual(self.server.logs(), "Triton Server Test Log") # Create local server logs self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus) self.server.start() self.server.stop() self.server_local_mock.assert_server_process_terminate_called() self.assertEqual(self.server.logs(), "Triton Server Test Log")
def setUp(self): # Mocks self.mock_server_docker = MockServerDockerMethods() self.tritonclient_mock = MockTritonClientMethods() # Create server config self.server_config = TritonServerConfig() self.server_config['model-repository'] = MODEL_REPOSITORY_PATH self.server_config['model-control-mode'] = 'explicit' # Set CUDA_VISIBLE_DEVICES os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Create and start the server self.server = TritonServerFactory.create_server_docker( model_path=MODEL_LOCAL_PATH, image=TRITON_IMAGE, config=self.server_config)
def test_run(self, requests_mock): # Now create a server config server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Create server, PerfAnalyzer, and wait for server ready self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config) perf_client = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config) self.server.start() requests_mock.get.return_value.status_code = 200 self.server.wait_for_ready(num_retries=1) # Run perf analyzer throughput_record, latency_record = perf_client.run() self.perf_mock.assert_perf_analyzer_run_as( [PERF_BIN_PATH, '-m', TEST_MODEL_NAME]) self.server.stop() # Test latency parsing test_latency_output = "Avg latency: 5000 ms\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_latency_output) _, latency_record = perf_client.run() self.assertEqual(latency_record.value(), 5000) # Test throughput parsing test_throughput_output = "Throughput: 46.8 ms\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_throughput_output) throughput_record, _ = perf_client.run() self.assertEqual(throughput_record.value(), 46.8) # Test parsing for both test_both_output = "Throughput: 0.001 ms\nAvg latency: 3.6 ms\n\n\n\n" self.perf_mock.set_perf_analyzer_result_string(test_both_output) throughput_record, latency_record = perf_client.run() self.assertEqual(throughput_record.value(), 0.001) self.assertEqual(latency_record.value(), 3.6)
def test_record_cpu_memory(self): server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH gpus = [ GPUDevice('TEST_DEVICE_NAME', 0, "TEST_PCI_BUS_ID", "TEST_UUID") ] frequency = 1 monitoring_time = 2 metrics = [CPUAvailableRAM, CPUUsedRAM] server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus) # Start triton and monitor server.start() cpu_monitor = CPUMonitor(server, frequency, metrics) cpu_monitor.start_recording_metrics() time.sleep(monitoring_time) records = cpu_monitor.stop_recording_metrics() # Assert library calls self.server_local_mock.assert_cpu_stats_called() # Assert instance types for record in records: self.assertIsInstance(record.value(), float) self.assertIsInstance(record.timestamp(), int) # The number of records should be dividable by number of metrics self.assertTrue(len(records) % len(metrics) == 0) self.assertTrue(len(records) > 0) with self.assertRaises(TritonModelAnalyzerException): cpu_monitor.stop_recording_metrics() cpu_monitor.destroy() server.stop()
def setUp(self): # GPUs gpus = [ GPUDevice('TEST_DEVICE_NAME', 0, "TEST_PCI_BUS_ID", "TEST_UUID") ] # Mocks self.server_docker_mock = MockServerDockerMethods() self.tritonclient_mock = MockTritonClientMethods() self.server_docker_mock.start() self.tritonclient_mock.start() # Create server config self.server_config = TritonServerConfig() self.server_config['model-repository'] = MODEL_REPOSITORY_PATH self.server_config['model-control-mode'] = 'explicit' # Set CUDA_VISIBLE_DEVICES os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Create and start the server self.server = TritonServerFactory.create_server_docker( image=TRITON_IMAGE, config=self.server_config, gpus=gpus)
def test_run(self): server_config = TritonServerConfig() server_config['model-repository'] = MODEL_REPOSITORY_PATH # Create server, client, PerfAnalyzer, and wait for server ready self.server = TritonServerFactory.create_server_local( path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus) perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config, max_retries=10, timeout=100, max_cpu_util=50) self.client = TritonClientFactory.create_grpc_client( server_url=TEST_GRPC_URL) self.server.start() self.client.wait_for_server_ready(num_retries=1) pa_csv_mock = """Concurrency,Inferences/Second,Client Send,Network+Server Send/Recv,Server Queue,Server Compute Input,Server Compute Infer,Server Compute Output,Client Recv,p50 latency,p90 latency,p95 latency,p99 latency,Avg latency,request/response,response wait\n1,46.8,2,187,18,34,65,16,1,4600,4700,4800,4900,5000,3,314""" # Test avg latency parsing perf_metrics = [PerfLatencyAvg] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 5) # Test p90 latency parsing perf_metrics = [PerfLatencyP90] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 4.7) # Test p95 latency parsing perf_metrics = [PerfLatencyP95] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 4.8) # Test p99 latency parsing perf_metrics = [PerfLatencyP99] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 4.9) # Test throughput parsing perf_metrics = [PerfThroughput] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 46.8) # Test client response wait perf_metrics = [PerfClientResponseWait] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 0.314) # Test server queue perf_metrics = [PerfServerQueue] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 0.018) # Test server compute infer perf_metrics = [PerfServerComputeInfer] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 0.065) # Test server compute input perf_metrics = [PerfServerComputeInput] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 0.034) # Test server compute infer perf_metrics = [PerfServerComputeOutput] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 1) self.assertEqual(records[0].value(), 0.016) # # Test parsing for subset perf_metrics = [ PerfThroughput, PerfLatencyAvg, PerfLatencyP90, PerfLatencyP95, PerfLatencyP99 ] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): perf_analyzer.run(perf_metrics) records = perf_analyzer.get_records() self.assertEqual(len(records), 5) # Test no exceptions are raised when nothing can be parsed pa_csv_empty = "" perf_metrics = [ PerfThroughput, PerfClientSendRecv, PerfClientResponseWait, PerfServerQueue, PerfServerComputeInfer, PerfServerComputeInput, PerfServerComputeOutput ] with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): self.assertFalse(perf_analyzer.run(perf_metrics)) # Test exception handling self.perf_mock.set_perf_analyzer_return_code(1) with patch('model_analyzer.perf_analyzer.perf_analyzer.open', mock_open(read_data=pa_csv_mock)), patch( 'model_analyzer.perf_analyzer.perf_analyzer.os.remove'): self.assertTrue(perf_analyzer.run(perf_metrics)) self.server.stop()