def _infer_exact_helper(tester,
                         pf,
                         tensor_shape,
                         batch_size,
                         input_dtype,
                         output0_dtype,
                         output1_dtype,
                         output0_raw=True,
                         output1_raw=True,
                         model_version=None,
                         swap=False,
                         outputs=("OUTPUT0", "OUTPUT1"),
                         use_http=True,
                         use_grpc=True,
                         skip_request_id_check=False,
                         use_streaming=True,
                         correlation_id=0):
     for bs in (1, batch_size):
         # model that does not support batching
         if bs == 1:
             iu.infer_exact(
                 tester,
                 pf + "_nobatch",
                 tensor_shape,
                 bs,
                 input_dtype,
                 output0_dtype,
                 output1_dtype,
                 output0_raw=output0_raw,
                 output1_raw=output1_raw,
                 model_version=model_version,
                 swap=swap,
                 outputs=outputs,
                 use_http=use_http,
                 use_grpc=use_grpc,
                 skip_request_id_check=skip_request_id_check,
                 use_streaming=use_streaming,
                 correlation_id=correlation_id,
                 use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                 use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
         # model that supports batching
         iu.infer_exact(
             tester,
             pf, (bs,) + tensor_shape,
             bs,
             input_dtype,
             output0_dtype,
             output1_dtype,
             output0_raw=output0_raw,
             output1_raw=output1_raw,
             model_version=model_version,
             swap=swap,
             outputs=outputs,
             use_http=use_http,
             use_grpc=use_grpc,
             skip_request_id_check=skip_request_id_check,
             use_streaming=use_streaming,
             correlation_id=correlation_id,
             use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
             use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
 def test_ensemble_add_sub(self):
     for bs in (1, 8):
         iu.infer_exact(self, "ensemble_add_sub", (bs, 16), bs,
                             np.int32, np.int32, np.int32)
     
     infer_count = self._get_infer_count_per_version("simple")
     # The two 'simple' versions should have the same infer count
     if (infer_count[0] != infer_count[1]):
         self.assertTrue(False, "unexpeced different infer count for different 'simple' versions")
 def test_select_optimization_profile(self):
     # Different profile has different optimized input shape
     batch_size = 4
     tensor_shape = (16, )
     try:
         iu.infer_exact(self, self.model_name_, tensor_shape, batch_size,
                        self.dtype_, self.dtype_, self.dtype_)
     except InferenceServerException as ex:
         self.assertTrue(False, "unexpected error {}".format(ex))
 def test_ensemble_mix_platform(self):
     # Skip on CPU only machine as TensorRT model is used in this ensemble
     if CPU_ONLY:
         return
     for bs in (1, 8):
         iu.infer_exact(self, "mix_platform", (bs, 16), bs,
             np.float32, np.float32, np.float32,
             use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
             use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
Example #5
0
    def check_response(self,
                       trial,
                       bs,
                       less_than,
                       threshold_ms,
                       requested_outputs=("OUTPUT0", "OUTPUT1")):
        global _check_exception
        try:
            input_size = 16

            start_ms = int(round(time.time() * 1000))

            if trial == "graphdef" or trial == "netdef":
                tensor_shape = (input_size, )
                iu.infer_exact(self,
                               trial,
                               tensor_shape,
                               bs,
                               True,
                               np.float32,
                               np.float32,
                               np.float32,
                               swap=True,
                               outputs=requested_outputs,
                               use_grpc=False,
                               skip_request_id_check=True)
            elif trial == "plan":
                tensor_shape = (input_size, 1, 1)
                iu.infer_exact(self,
                               trial,
                               tensor_shape,
                               bs,
                               True,
                               np.float32,
                               np.float32,
                               np.float32,
                               swap=True,
                               outputs=requested_outputs,
                               use_grpc=False,
                               skip_request_id_check=True)
            else:
                self.assertFalse(True, "unknown trial type: " + trial)

            end_ms = int(round(time.time() * 1000))
            if less_than:
                self.assertTrue(
                    (end_ms - start_ms) < threshold_ms,
                    "expected less than " + str(threshold_ms) +
                    "ms response time, got " + str(end_ms - start_ms) + " ms")
            else:
                self.assertTrue(
                    (end_ms - start_ms) > threshold_ms,
                    "expected greater than " + str(threshold_ms) +
                    "ms response time, got " + str(end_ms - start_ms) + " ms")
        except Exception as ex:
            _check_exception = ex
Example #6
0
 def test_ensemble_mix_ensemble(self):
     for bs in (1, 8):
         iu.infer_exact(self,
                        "mix_ensemble", (16, ),
                        bs,
                        np.int32,
                        np.float32,
                        np.float32,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
 def test_ensemble_add_sub_one_output(self):
     for bs in (1, 8):
         iu.infer_exact(self, "ensemble_add_sub", (bs, 16), bs,
                             np.int32, np.int32, np.int32,
                             outputs=("OUTPUT0",))
     
     infer_count = self._get_infer_count_per_version("simple")
     # Only 'simple' version 2 should have non-zero infer count
     # as it is in charge of producing OUTPUT0
     if (infer_count[0] != 0):
         self.assertTrue(False, "unexpeced non-zero infer count for 'simple' version 1")
     elif (infer_count[1] == 0):
         self.assertTrue(False, "unexpeced zero infer count for 'simple' version 2")
Example #8
0
 def _check_infer(self, tensor_shape, batch_size=1):
     try:
         iu.infer_exact(self,
                        self.model_name_,
                        tensor_shape,
                        batch_size,
                        self.dtype_,
                        self.dtype_,
                        self.dtype_,
                        model_version=1,
                        use_grpc=False,
                        use_streaming=False)
     except InferenceServerException as ex:
         self.assertTrue(False, "unexpected error {}".format(ex))
            def test_ensemble_label_lookup(self):
                if all(x in BACKENDS for x in ['graphdef', 'netdef', 'savedmodel']):
                    # Ensemble needs to look up label from the actual model
                    for bs in (1, 8):
                        iu.infer_exact(self, "mix_platform", (bs, 16), bs,
                            np.float32, np.float32, np.float32, output0_raw=False, output1_raw=False,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

                if all(x in BACKENDS for x in ['graphdef', 'netdef', 'savedmodel']):
                    # Label from the actual model will be passed along the nested ensemble
                    for bs in (1, 8):
                        iu.infer_exact(self, "mix_ensemble", (bs, 16), bs,
                            np.int32, np.float32, np.float32, output0_raw=False, output1_raw=False,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

                if "graphdef" in BACKENDS:
                    # If label file is provided, it will use the provided label file directly
                    try:
                        iu.infer_exact(self, "wrong_label", (1, 16), 1,
                            np.int32, np.float32, np.float32, output0_raw=False, output1_raw=False,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                    except AssertionError:
                        # Sanity check that infer_exact failed since this ensemble is provided
                        # with unexpected labels
                        pass

                if "graphdef" in BACKENDS:
                    for bs in (1, 8):
                        iu.infer_exact(self, "label_override", (bs, 16), bs,
                            np.int32, np.float32, np.float32, output0_raw=False, output1_raw=False,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
Example #10
0
    def test_load_specific_optimization_profile(self):
        # Only OP 5 should be available, which only allow batch size 8
        tensor_shape = (1,)
        try:
            iu.infer_exact(self, self.model_name_, (1,) + tensor_shape, 1,
                            self.dtype_, self.dtype_, self.dtype_)
        except InferenceServerException as ex:
            self.assertTrue(
              "model expected the shape of dimension 0 to be between 6 and 8 but received 1" in ex.message())

        try:
            iu.infer_exact(self, self.model_name_, (8,) + tensor_shape, 8,
                            self.dtype_, self.dtype_, self.dtype_)
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
    def test_parse_error_modelfail(self):
        # --strict-readiness=true so server is live but not ready
        input_size = 16
        tensor_shape = (input_size, )

        # Server was started but with a model that fails to load
        try:
            for pair in [("localhost:8000", ProtocolType.HTTP),
                         ("localhost:8001", ProtocolType.GRPC)]:
                model_name = tu.get_model_name('graphdef', np.float32,
                                               np.float32, np.float32)
                ctx = ServerStatusContext(pair[0], pair[1], model_name, True)
                ss = ctx.get_server_status()
                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"],
                                 ss.version)
                self.assertEqual("inference:0", ss.id)
                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
                uptime = ss.uptime_ns
                self.assertGreater(uptime, 0)

                self.assertEqual(len(ss.model_status), 1)
                self.assertTrue(model_name in ss.model_status,
                                "expected status for model " + model_name)
                for (k, v) in iteritems(
                        ss.model_status[model_name].version_status):
                    self.assertEqual(v.ready_state,
                                     server_status.MODEL_UNAVAILABLE)

                hctx = ServerHealthContext(pair[0], pair[1], True)
                self.assertFalse(hctx.is_ready())
                self.assertTrue(hctx.is_live())

        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        try:
            iu.infer_exact(self, 'graphdef', tensor_shape, 1, True, np.float32,
                           np.float32, np.float32)
            self.assertTrue(
                False, "expected error for unavailable model " + model_name)
        except InferenceServerException as ex:
            self.assertEqual("inference:0", ex.server_id())
            self.assertGreater(ex.request_id(), 0)
            self.assertTrue(ex.message().startswith(
                "Inference request for unknown model 'graphdef_float32_float32_float32'"
            ))
    def test_load_specific_optimization_profile(self):
        # Only OP 5 should be available, which only allow batch size 8
        tensor_shape = (1, )
        try:
            iu.infer_exact(self, self.model_name_, tensor_shape, 1,
                           self.dtype_, self.dtype_, self.dtype_)
        except InferenceServerException as ex:
            self.assertEqual("inference:0", ex.server_id())
            self.assertTrue(
                "The shape of dimension 0 is expected to be in range from 6 to 8, Got: 1"
                in ex.message())

        try:
            iu.infer_exact(self, self.model_name_, tensor_shape, 8,
                           self.dtype_, self.dtype_, self.dtype_)
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
Example #13
0
    def test_load_default_optimization_profile(self):
        # Only default OP (OP 0) has max tensor shape 33
        tensor_shape = (33,)

        try:
            iu.infer_exact(self, self.model_name_, (8,) + tensor_shape, 8,
                            self.dtype_, self.dtype_, self.dtype_)
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        over_tensor_shape = (34,)
        try:
            iu.infer_exact(self, self.model_name_, (8,) + over_tensor_shape, 8,
                            self.dtype_, self.dtype_, self.dtype_)
        except InferenceServerException as ex:
            self.assertTrue(
                    "model expected the shape of dimension 1 to be between 1 and 33 but received 34" in ex.message())
    def test_load_default_optimization_profile(self):
        # Only default OP (OP 0) has max tensor shape 33
        tensor_shape = (33, )

        try:
            iu.infer_exact(self, self.model_name_, tensor_shape, 8,
                           self.dtype_, self.dtype_, self.dtype_)
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))

        over_tensor_shape = (34, )
        try:
            iu.infer_exact(self, self.model_name_, over_tensor_shape, 8,
                           self.dtype_, self.dtype_, self.dtype_)
        except InferenceServerException as ex:
            self.assertEqual("inference:0", ex.server_id())
            self.assertTrue(
                "The shape of dimension 1 is expected to be in range from 1 to 33, Got: 34"
                in ex.message())
    def test_raw_version_specific_1_3(self):
        input_size = 16

        # There are 3 versions of *_float32_float32_float32 but only
        # versions 1 and 3 should be available.
        for platform in ('graphdef', 'savedmodel', 'netdef', 'plan'):
            if platform == 'plan' and CPU_ONLY:
                continue
            if platform not in BACKENDS:
                continue
            tensor_shape = (1, input_size)
            iu.infer_exact(self, platform, tensor_shape, 1,
                           np.float32, np.float32, np.float32,
                           model_version=1, swap=False,
                           use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                           use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

            try:
                iu.infer_exact(self, platform, tensor_shape, 1,
                               np.float32, np.float32, np.float32,
                               model_version=2, swap=True,
                               use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                               use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            except InferenceServerException as ex:
                self.assertTrue(
                    ex.message().startswith("Request for unknown model"))

            iu.infer_exact(self, platform, tensor_shape, 1,
                           np.float32, np.float32, np.float32,
                           model_version=3, swap=True,
                           use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                           use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
    def test_raw_version_latest_1(self):
        input_size = 16
        tensor_shape = (1, input_size)

        # There are 3 versions of graphdef_int8_int8_int8 but
        # only version 3 should be available
        for platform in ('graphdef', 'savedmodel'):
            if platform not in BACKENDS:
                continue
            try:
                iu.infer_exact(self, platform, tensor_shape, 1,
                               np.int8, np.int8, np.int8,
                               model_version=1, swap=False,
                               use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                               use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            except InferenceServerException as ex:
                self.assertTrue(
                    ex.message().startswith("Request for unknown model"))

            try:
                iu.infer_exact(self, platform, tensor_shape, 1,
                               np.int8, np.int8, np.int8,
                               model_version=2, swap=True,
                               use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                               use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            except InferenceServerException as ex:
                self.assertTrue(
                    ex.message().startswith("Request for unknown model"))

            iu.infer_exact(self, platform, tensor_shape, 1,
                           np.int8, np.int8, np.int8,
                           model_version=3, swap=True,
                           use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                           use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            def test_ensemble_mix_batch_nobatch(self):
                base_names = ["batch_to_nobatch", "nobatch_to_batch"]
                for name in base_names:
                    for bs in (1, 8):
                        iu.infer_exact(
                            self,
                            name, (bs, 16),
                            bs,
                            np.float32,
                            np.float32,
                            np.float32,
                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                    iu.infer_exact(
                        self,
                        name + "_nobatch", (8, 16),
                        1,
                        np.float32,
                        np.float32,
                        np.float32,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

                # batch -> nobatch -> batch
                for bs in (1, 8):
                    iu.infer_exact(
                        self,
                        "mix_nobatch_batch", (bs, 16),
                        bs,
                        np.float32,
                        np.float32,
                        np.float32,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
    def test_raw_version_all(self):
        input_size = 16
        tensor_shape = (input_size, )

        # There are 3 versions of *_int32_int32_int32 and all should
        # be available.
        for platform in ('graphdef', 'savedmodel', 'netdef'):
            iu.infer_exact(self,
                           platform,
                           tensor_shape,
                           1,
                           np.int32,
                           np.int32,
                           np.int32,
                           model_version=1,
                           swap=False)
            iu.infer_exact(self,
                           platform,
                           tensor_shape,
                           1,
                           np.int32,
                           np.int32,
                           np.int32,
                           model_version=2,
                           swap=True)
            iu.infer_exact(self,
                           platform,
                           tensor_shape,
                           1,
                           np.int32,
                           np.int32,
                           np.int32,
                           model_version=3,
                           swap=True)
    def check_response(self, trial, bs, thresholds,
                       requested_outputs=("OUTPUT0", "OUTPUT1"), input_size=16):
        global _check_exception
        try:
            start_ms = int(round(time.time() * 1000))

            if trial == "savedmodel" or trial == "graphdef" or trial == "netdef" \
                    or trial == "custom" or trial == "libtorch" or trial == "onnx":
                tensor_shape = (input_size,)
                iu.infer_exact(self, trial, tensor_shape, bs,
                               np.float32, np.float32, np.float32, swap=False,
                               model_version=1, outputs=requested_outputs,
                               use_grpc=False, skip_request_id_check=True,
                               use_streaming=False)
            elif trial == "plan":
                tensor_shape = (input_size,1,1)
                iu.infer_exact(self, trial, tensor_shape, bs,
                               np.float32, np.float32, np.float32, swap=False,
                               model_version=1, outputs=requested_outputs,
                               use_grpc=False, skip_request_id_check=True,
                               use_streaming=False)
            else:
                self.assertFalse(True, "unknown trial type: " + trial)

            end_ms = int(round(time.time() * 1000))

            lt_ms = thresholds[0]
            gt_ms = thresholds[1]
            if lt_ms is not None:
                self.assertTrue((end_ms - start_ms) < lt_ms,
                                "expected less than " + str(lt_ms) +
                                "ms response time, got " + str(end_ms - start_ms) + " ms")
            if gt_ms is not None:
                self.assertTrue((end_ms - start_ms) > gt_ms,
                                "expected greater than " + str(gt_ms) +
                                "ms response time, got " + str(end_ms - start_ms) + " ms")
        except Exception as ex:
            _check_exception = ex
        def test_ensemble_label_lookup(self):
            # Ensemble needs to look up label from the actual model
            for bs in (1, 8):
                iu.infer_exact(self,
                               "mix_platform", (16, ),
                               bs,
                               np.float32,
                               np.float32,
                               np.float32,
                               output0_raw=False,
                               output1_raw=False)

            # Label from the actual model will be passed along the nested ensemble
            for bs in (1, 8):
                iu.infer_exact(self,
                               "mix_ensemble", (16, ),
                               bs,
                               np.int32,
                               np.float32,
                               np.float32,
                               output0_raw=False,
                               output1_raw=False)

            # If label file is provided, it will use the provided label file directly
            try:
                iu.infer_exact(self,
                               "wrong_label", (16, ),
                               1,
                               np.int32,
                               np.float32,
                               np.float32,
                               output0_raw=False,
                               output1_raw=False)
            except AssertionError:
                # Sanity check that infer_exact failed since this ensemble is provided
                # with unexpected labels
                pass

            for bs in (1, 8):
                iu.infer_exact(self,
                               "label_override", (16, ),
                               bs,
                               np.int32,
                               np.float32,
                               np.float32,
                               output0_raw=False,
                               output1_raw=False)
Example #21
0
    def test_raw_version_specific_1(self):
        input_size = 16
        tensor_shape = (input_size, )

        # There are 3 versions of *_float16_float16_float16 but only
        # version 1 should be available.
        for platform in ('graphdef', 'savedmodel'):
            iu.infer_exact(self,
                           platform,
                           tensor_shape,
                           1,
                           np.float16,
                           np.float16,
                           np.float16,
                           model_version=1,
                           swap=False,
                           use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                           use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

            try:
                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.float16,
                    np.float16,
                    np.float16,
                    model_version=2,
                    swap=True,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            except InferenceServerException as ex:
                self.assertEqual("inference:0", ex.server_id())
                self.assertTrue(ex.message().startswith(
                    "Inference request for unknown model"))

            try:
                iu.infer_exact(
                    self,
                    platform,
                    tensor_shape,
                    1,
                    np.float16,
                    np.float16,
                    np.float16,
                    model_version=3,
                    swap=True,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            except InferenceServerException as ex:
                self.assertEqual("inference:0", ex.server_id())
                self.assertTrue(ex.message().startswith(
                    "Inference request for unknown model"))
Example #22
0
    def test_ensemble_mix_batch_nobatch(self):
        base_names = ["batch_to_nobatch", "nobatch_to_batch"]
        for name in base_names:
            for bs in (1, 8):
                iu.infer_exact(self, name, (16,), bs,
                    np.float32, np.float32, np.float32)
            iu.infer_exact(self, name + "_nobatch", (8, 16,), 1,
                np.float32, np.float32, np.float32)

        # batch -> nobatch -> batch
        for bs in (1, 8):
            iu.infer_exact(self, "mix_nobatch_batch", (16,), bs,
                np.float32, np.float32, np.float32)