Esempio n. 1
0
    def _test_create_server(self, gpus):
        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Run for both types of environments
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)

        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)

        # Try to create a server without specifying model repository and expect
        # error
        server_config['model-repository'] = None
        with self.assertRaises(
                AssertionError,
                msg="Expected AssertionError for trying to create"
                "server without specifying model repository."):
            self.server = TritonServerFactory.create_server_docker(
                image=TRITON_IMAGE, config=server_config, gpus=gpus)
        with self.assertRaises(
                AssertionError,
                msg="Expected AssertionError for trying to create"
                "server without specifying model repository."):
            self.server = TritonServerFactory.create_server_local(
                path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)
Esempio n. 2
0
    def test_cpu_stats(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH
        gpus = ['all']

        # Test local server cpu_stats
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        self.server.start()
        _, _ = self.server.cpu_stats()
        self.server_local_mock.assert_cpu_stats_called()
        self.server.stop()

        # Test docker server cpu stats
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)
        self.server.start()

        # The following needs to be called as it resets exec_run return value
        self.server_docker_mock.assert_server_process_start_called_with(
            TRITON_DOCKER_BIN_PATH + ' ' + server_config.to_cli_string(),
            MODEL_REPOSITORY_PATH, TRITON_IMAGE, 8000, 8001, 8002)
        _, _ = self.server.cpu_stats()
        self.server_docker_mock.assert_cpu_stats_called()
        self.server.stop()
Esempio n. 3
0
    def _test_cpu_stats(self, gpus):
        device_requests = [device.device_id() for device in gpus]

        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Test local server cpu_stats
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)
        self.server.start()
        _, _ = self.server.cpu_stats()
        self.server_local_mock.assert_cpu_stats_called()
        self.server.stop()

        # Test docker server cpu stats
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)
        self.server.start()

        # The following needs to be called as it resets exec_run return value
        self.server_docker_mock.assert_server_process_start_called_with(
            f'{TRITON_DOCKER_BIN_PATH} {server_config.to_cli_string()}',
            MODEL_REPOSITORY_PATH, TRITON_IMAGE, device_requests, gpus, 8000,
            8001, 8002)
        _, _ = self.server.cpu_stats()
        self.server_docker_mock.assert_cpu_stats_called()
        self.server.stop()
    def test_monitor_disable(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH
        gpus = [
            GPUDevice('TEST_DEVICE_NAME', 0, "TEST_PCI_BUS_ID", "TEST_UUID")
        ]

        frequency = 1
        monitoring_time = 2
        metrics = []

        server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)

        # Start triton and monitor
        server.start()
        cpu_monitor = CPUMonitor(server, frequency, metrics)
        cpu_monitor.start_recording_metrics()
        time.sleep(monitoring_time)
        records = cpu_monitor.stop_recording_metrics()

        # Assert no library calls
        self.server_local_mock.assert_cpu_stats_not_called()

        cpu_monitor.destroy()
        server.stop()
Esempio n. 5
0
    def start_stop_docker_args(self):
        device_requests, gpu_uuids = self._find_correct_gpu_settings(
            self._sys_gpus)

        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create mounts and labels
        mounts = [
            '/host/path:/dest/path:ro', '/another/host/path:/some/dest/path:rw'
        ]
        labels = {'RUNNER_ID': 'TEST_RUNNER_ID'}

        environment = {'VARIABLE': 'VALUE'}
        # Create server in docker, start , wait, and stop
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE,
            config=server_config,
            gpus=self._sys_gpus,
            mounts=mounts,
            labels=labels)

        # Start server check that mocked api is called
        self.server.start(env=environment)
        self.server_docker_mock.assert_server_process_start_called_with(
            f"{TRITON_DOCKER_BIN_PATH} {server_config.to_cli_string()}",
            MODEL_REPOSITORY_PATH, TRITON_IMAGE, device_requests, gpu_uuids,
            8000, 8001, 8002, mounts, labels)

        # Stop container and check api calls
        self.server.stop()
        self.server_docker_mock.assert_server_process_terminate_called()
Esempio n. 6
0
    def test_measurement_request_count_increase(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     max_retries=10,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()

        # Test the timeout for count mode
        self.client.wait_for_server_ready(num_retries=1)
        test_both_output = "Please use a larger time window"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        self.perf_mock.set_perf_analyzer_return_code(1)
        perf_metrics = [PerfThroughput, PerfLatencyP99]
        perf_analyzer.run(perf_metrics)
        self.assertEqual(
            self.perf_mock.get_perf_analyzer_popen_read_call_count(), 10)
Esempio n. 7
0
    def test_measurement_interval_increase(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus)
        perf_analyzer_config = PerfAnalyzerConfig()
        perf_analyzer_config['model-name'] = TEST_MODEL_NAME
        perf_analyzer_config['concurrency-range'] = TEST_CONCURRENCY_RANGE
        perf_analyzer_config['measurement-mode'] = 'time_windows'
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     max_retries=10,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()

        # Test failure to stabilize for measurement windows
        self.client.wait_for_server_ready(num_retries=1)
        test_stabilize_output = "Please use a larger time window"
        self.perf_mock.set_perf_analyzer_result_string(test_stabilize_output)
        self.perf_mock.set_perf_analyzer_return_code(1)
        perf_metrics = [PerfThroughput, PerfLatencyP99]
        perf_analyzer.run(perf_metrics)
        self.assertEqual(
            self.perf_mock.get_perf_analyzer_popen_read_call_count(), 10)
Esempio n. 8
0
    def test_run(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()
        self.client.wait_for_server_ready(num_retries=1)

        # Run perf analyzer with dummy metrics to check command parsing
        perf_metrics = [id]
        test_latency_output = "p99 latency: 5000 us\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_analyzer.run(perf_metrics)
        self.perf_mock.assert_perf_analyzer_run_as([
            PERF_BIN_PATH, '-m', TEST_MODEL_NAME, '--measurement-interval',
            str(self.config['measurement-interval'])
        ])

        # Test latency parsing
        test_latency_output = "p99 latency: 5000 us\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_metrics = [PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 5)

        # Test throughput parsing
        test_throughput_output = "Throughput: 46.8 infer/sec\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_throughput_output)
        perf_metrics = [PerfThroughput]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 46.8)

        # Test parsing for both
        test_both_output = "Throughput: 0.001 infer/sec\np99 latency: 3600 us\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        perf_metrics = [PerfThroughput, PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 2)
        self.assertEqual(records[0].value(), 0.001)
        self.assertEqual(records[1].value(), 3.6)

        # Test exception handling
        self.perf_mock.set_perf_analyzer_return_code(1)
        self.assertTrue(perf_analyzer.run(perf_metrics), 1)
        self.server.stop()
Esempio n. 9
0
    def test_run(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()
        self.client.wait_for_server_ready(num_retries=1)

        # Run perf analyzer with dummy metrics to check command parsing
        perf_metrics = [id]
        test_latency_output = "Avg latency: 5000 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_analyzer.run(perf_metrics)
        self.perf_mock.assert_perf_analyzer_run_as(
            [PERF_BIN_PATH, '-m', TEST_MODEL_NAME])

        # Test latency parsing
        test_latency_output = "Avg latency: 5000 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_metrics = [PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 5000)

        # Test throughput parsing
        test_throughput_output = "Throughput: 46.8 infer/sec\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_throughput_output)
        perf_metrics = [PerfThroughput]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 46.8)

        # Test parsing for both
        test_both_output = "Throughput: 0.001 infer/sec\nAvg latency: 3.6 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        perf_metrics = [PerfThroughput, PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 2)
        self.assertEqual(records[0].value(), 0.001)
        self.assertEqual(records[1].value(), 3.6)

        # Test exception handling
        with self.assertRaisesRegex(
                expected_exception=TritonModelAnalyzerException,
                expected_regex="Running perf_analyzer with",
                msg="Expected TritonModelAnalyzerException"):
            self.perf_mock.raise_exception_on_run()
            perf_analyzer.run(perf_metrics)

        self.server.stop()
Esempio n. 10
0
    def test_start_wait_stop_gpus(self):
        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'

        # Create server in docker, start , wait, and stop
        self.server = TritonServerFactory.create_server_docker(
            model_path=MODEL_LOCAL_PATH,
            image=TRITON_IMAGE,
            config=server_config)

        # Set mock status_code to error, and generate exception
        self._mock_server_wait_for_ready(assert_raises=True)

        # Start server check that mocked api is called
        self.server.start()
        self.server_docker_mock.assert_server_process_start_called_with(
            TRITON_DOCKER_BIN_PATH + ' ' + server_config.to_cli_string(),
            MODEL_LOCAL_PATH, MODEL_REPOSITORY_PATH, TRITON_IMAGE, 8000, 8001,
            8002)

        # Mock status code for connected server then stop
        self._mock_server_wait_for_ready(assert_raises=False)

        # Stop container and check api calls
        self.server.stop()
        self.server_docker_mock.assert_server_process_terminate_called()

        # Create local server which runs triton as a subprocess
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)

        self._mock_server_wait_for_ready(assert_raises=True)

        # Check that API functions are called
        self.server.start()

        self.server_local_mock.assert_server_process_start_called_with(cmd=[
            TRITON_LOCAL_BIN_PATH, '--model-repository', MODEL_REPOSITORY_PATH
        ])

        self._mock_server_wait_for_ready(assert_raises=False)
        self.server.stop()
        self.server_local_mock.assert_server_process_terminate_called()
Esempio n. 11
0
    def _test_start_stop_gpus(self, gpus):
        device_requests = [device.device_id() for device in gpus]

        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server in docker, start , wait, and stop
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)

        # Start server check that mocked api is called
        self.server.start()
        self.server_docker_mock.assert_server_process_start_called_with(
            f"{TRITON_DOCKER_BIN_PATH} {server_config.to_cli_string()}",
            MODEL_REPOSITORY_PATH, TRITON_IMAGE, device_requests, gpus, 8000,
            8001, 8002)

        self.server_docker_mock.raise_exception_on_container_run()
        with self.assertRaises(TritonModelAnalyzerException):
            self.server.start()
        self.server_docker_mock.stop_raise_exception_on_container_run()

        # Stop container and check api calls
        self.server.stop()
        self.server_docker_mock.assert_server_process_terminate_called()

        # Create local server which runs triton as a subprocess
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)

        # Check that API functions are called
        self.server.start()

        self.server_local_mock.assert_server_process_start_called_with(
            cmd=[
                TRITON_LOCAL_BIN_PATH, '--model-repository',
                MODEL_REPOSITORY_PATH
            ],
            gpus=gpus)

        self.server.stop()
        self.server_local_mock.assert_server_process_terminate_called()
Esempio n. 12
0
    def _test_get_logs(self, gpus):
        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Check docker server logs
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)
        self.server.start()
        self.server.stop()
        self.server_docker_mock.assert_server_process_terminate_called()
        self.assertEqual(self.server.logs(), "Triton Server Test Log")

        # Create local server logs
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)
        self.server.start()
        self.server.stop()
        self.server_local_mock.assert_server_process_terminate_called()
        self.assertEqual(self.server.logs(), "Triton Server Test Log")
Esempio n. 13
0
    def setUp(self):
        # Mocks
        self.mock_server_docker = MockServerDockerMethods()
        self.tritonclient_mock = MockTritonClientMethods()

        # Create server config
        self.server_config = TritonServerConfig()
        self.server_config['model-repository'] = MODEL_REPOSITORY_PATH
        self.server_config['model-control-mode'] = 'explicit'

        # Set CUDA_VISIBLE_DEVICES
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'

        # Create and start the server
        self.server = TritonServerFactory.create_server_docker(
            model_path=MODEL_LOCAL_PATH,
            image=TRITON_IMAGE,
            config=self.server_config)
Esempio n. 14
0
    def test_run(self, requests_mock):
        # Now create a server config
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        perf_client = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config)

        self.server.start()
        requests_mock.get.return_value.status_code = 200
        self.server.wait_for_ready(num_retries=1)

        # Run perf analyzer
        throughput_record, latency_record = perf_client.run()
        self.perf_mock.assert_perf_analyzer_run_as(
            [PERF_BIN_PATH, '-m', TEST_MODEL_NAME])
        self.server.stop()

        # Test latency parsing
        test_latency_output = "Avg latency: 5000 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        _, latency_record = perf_client.run()
        self.assertEqual(latency_record.value(), 5000)

        # Test throughput parsing
        test_throughput_output = "Throughput: 46.8 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_throughput_output)
        throughput_record, _ = perf_client.run()
        self.assertEqual(throughput_record.value(), 46.8)

        # Test parsing for both
        test_both_output = "Throughput: 0.001 ms\nAvg latency: 3.6 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        throughput_record, latency_record = perf_client.run()
        self.assertEqual(throughput_record.value(), 0.001)
        self.assertEqual(latency_record.value(), 3.6)
    def test_record_cpu_memory(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH
        gpus = [
            GPUDevice('TEST_DEVICE_NAME', 0, "TEST_PCI_BUS_ID", "TEST_UUID")
        ]

        frequency = 1
        monitoring_time = 2
        metrics = [CPUAvailableRAM, CPUUsedRAM]

        server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)

        # Start triton and monitor
        server.start()
        cpu_monitor = CPUMonitor(server, frequency, metrics)
        cpu_monitor.start_recording_metrics()
        time.sleep(monitoring_time)
        records = cpu_monitor.stop_recording_metrics()

        # Assert library calls
        self.server_local_mock.assert_cpu_stats_called()

        # Assert instance types
        for record in records:
            self.assertIsInstance(record.value(), float)
            self.assertIsInstance(record.timestamp(), int)

        # The number of records should be dividable by number of metrics
        self.assertTrue(len(records) % len(metrics) == 0)
        self.assertTrue(len(records) > 0)

        with self.assertRaises(TritonModelAnalyzerException):
            cpu_monitor.stop_recording_metrics()

        cpu_monitor.destroy()
        server.stop()
    def setUp(self):

        # GPUs
        gpus = [
            GPUDevice('TEST_DEVICE_NAME', 0, "TEST_PCI_BUS_ID", "TEST_UUID")
        ]

        # Mocks
        self.server_docker_mock = MockServerDockerMethods()
        self.tritonclient_mock = MockTritonClientMethods()
        self.server_docker_mock.start()
        self.tritonclient_mock.start()

        # Create server config
        self.server_config = TritonServerConfig()
        self.server_config['model-repository'] = MODEL_REPOSITORY_PATH
        self.server_config['model-control-mode'] = 'explicit'

        # Set CUDA_VISIBLE_DEVICES
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'

        # Create and start the server
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=self.server_config, gpus=gpus)
Esempio n. 17
0
    def test_run(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     max_retries=10,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()
        self.client.wait_for_server_ready(num_retries=1)

        pa_csv_mock = """Concurrency,Inferences/Second,Client Send,Network+Server Send/Recv,Server Queue,Server Compute Input,Server Compute Infer,Server Compute Output,Client Recv,p50 latency,p90 latency,p95 latency,p99 latency,Avg latency,request/response,response wait\n1,46.8,2,187,18,34,65,16,1,4600,4700,4800,4900,5000,3,314"""

        # Test avg latency parsing
        perf_metrics = [PerfLatencyAvg]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 5)

        # Test p90 latency parsing
        perf_metrics = [PerfLatencyP90]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 4.7)

        # Test p95 latency parsing
        perf_metrics = [PerfLatencyP95]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 4.8)

        # Test p99 latency parsing
        perf_metrics = [PerfLatencyP99]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 4.9)

        # Test throughput parsing
        perf_metrics = [PerfThroughput]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 46.8)

        # Test client response wait
        perf_metrics = [PerfClientResponseWait]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.314)

        # Test server queue
        perf_metrics = [PerfServerQueue]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.018)

        # Test server compute infer
        perf_metrics = [PerfServerComputeInfer]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.065)

        # Test server compute input
        perf_metrics = [PerfServerComputeInput]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.034)

        # Test server compute infer
        perf_metrics = [PerfServerComputeOutput]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.016)

        # # Test parsing for subset
        perf_metrics = [
            PerfThroughput, PerfLatencyAvg, PerfLatencyP90, PerfLatencyP95,
            PerfLatencyP99
        ]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 5)

        # Test no exceptions are raised when nothing can be parsed
        pa_csv_empty = ""
        perf_metrics = [
            PerfThroughput, PerfClientSendRecv, PerfClientResponseWait,
            PerfServerQueue, PerfServerComputeInfer, PerfServerComputeInput,
            PerfServerComputeOutput
        ]
        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            self.assertFalse(perf_analyzer.run(perf_metrics))

        # Test exception handling
        self.perf_mock.set_perf_analyzer_return_code(1)
        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            self.assertTrue(perf_analyzer.run(perf_metrics))
        self.server.stop()