コード例 #1
0
def create_plan_model(models_dir, max_batch, model_version, input_shape,
                      output0_shape, output1_shape, input_dtype, output0_dtype,
                      output1_dtype, input_memory_format,
                      output_memory_format):

    if not tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype,
                                     input_shape, output0_shape,
                                     output1_shape):
        return

    create_plan_modelconfig(models_dir, max_batch, model_version, input_shape,
                            output0_shape, output1_shape, input_dtype,
                            output0_dtype, output1_dtype, input_memory_format,
                            output_memory_format, None)

    if (not tu.shape_is_fixed(input_shape)
            or not tu.shape_is_fixed(output0_shape)
            or not tu.shape_is_fixed(output1_shape)):
        create_plan_dynamic_modelfile(models_dir, max_batch, model_version,
                                      input_shape, output0_shape,
                                      output1_shape, input_dtype,
                                      output0_dtype, output1_dtype,
                                      input_memory_format,
                                      output_memory_format)
    else:
        create_plan_fixed_modelfile(models_dir, max_batch, model_version,
                                    input_shape, output0_shape, output1_shape,
                                    input_dtype, output0_dtype, output1_dtype,
                                    input_memory_format, output_memory_format)
    def _trt_reshape(self,
                     dtype,
                     input_shapes,
                     output_shapes=None,
                     no_batch=True):
        # 'shapes' is list of shapes, one for each input.
        if output_shapes is None:
            output_shapes = input_shapes

        if tu.validate_for_trt_model(dtype, dtype, dtype, input_shapes[0],
                                     input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(
                    self,
                    'plan',
                    bs,
                    dtype,
                    input_shapes,
                    output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(
                    self,
                    'plan_nobatch',
                    1,
                    dtype,
                    input_shapes,
                    output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
コード例 #3
0
    def _trt_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True):
        # 'shapes' is list of shapes, one for each input.
        if output_shapes is None:
            output_shapes = input_shapes

        if tu.validate_for_trt_model(dtype, dtype, dtype,
                                     input_shapes[0], input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'plan', bs, dtype, input_shapes, output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'plan_nobatch', 1, dtype, input_shapes, output_shapes)
コード例 #4
0
    def _full_exact(self, req_raw, input_dtype, output0_dtype, output1_dtype, swap):
        input_size = 16

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_exact(self, 'graphdef', (input_size,), bs, req_raw,
                               input_dtype, output0_dtype, output1_dtype, swap=swap)
                iu.infer_exact(self, 'savedmodel', (input_size,), bs, req_raw,
                               input_dtype, output0_dtype, output1_dtype, swap=swap)
            # model that does not support batching
            iu.infer_exact(self, 'graphdef_nobatch', (input_size,), 1, req_raw,
                           input_dtype, output0_dtype, output1_dtype, swap=swap)
            iu.infer_exact(self, 'savedmodel_nobatch', (input_size,), 1, req_raw,
                           input_dtype, output0_dtype, output1_dtype, swap=swap)

        if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_exact(self, 'netdef', (input_size,), bs, req_raw,
                               input_dtype, output0_dtype, output1_dtype, swap=swap)
            # model that does not support batching
            iu.infer_exact(self, 'netdef_nobatch', (input_size,), 1, req_raw,
                           input_dtype, output0_dtype, output1_dtype, swap=swap)

        if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_exact(self, 'plan', (input_size, 1, 1), bs, req_raw,
                               input_dtype, output0_dtype, output1_dtype, swap=swap)
            # model that does not support batching
            iu.infer_exact(self, 'plan_nobatch', (input_size, 1, 1), 1, req_raw,
                           input_dtype, output0_dtype, output1_dtype, swap=swap)

        # the custom model is src/custom/addsub... it does not swap
        # the inputs so always set to False
        if tu.validate_for_custom_model(input_dtype, output0_dtype, output1_dtype):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_exact(self, 'custom', (input_size,), bs, req_raw,
                               input_dtype, output0_dtype, output1_dtype, swap=False)
            # model that does not support batching
            iu.infer_exact(self, 'custom_nobatch', (input_size,), 1, req_raw,
                           input_dtype, output0_dtype, output1_dtype, swap=False)
コード例 #5
0
ファイル: infer_test.py プロジェクト: maxdml/server
    def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
                    output0_raw, output1_raw, swap):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_http_json_tensors=True,
                                skip_request_id_check=True,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(
                        tester,
                        pf + "_nobatch",
                        tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        model_version=model_version,
                        swap=swap,
                        outputs=outputs,
                        use_http=use_http,
                        use_grpc=use_grpc,
                        use_http_json_tensors=use_http_json_tensors,
                        skip_request_id_check=skip_request_id_check,
                        use_streaming=use_streaming,
                        correlation_id=correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                # model that supports batching
                iu.infer_exact(
                    tester,
                    pf, (bs, ) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    model_version=model_version,
                    swap=swap,
                    outputs=outputs,
                    use_http=use_http,
                    use_grpc=use_grpc,
                    use_http_json_tensors=use_http_json_tensors,
                    skip_request_id_check=skip_request_id_check,
                    use_streaming=use_streaming,
                    correlation_id=correlation_id,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        input_size = 16

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        if ENSEMBLES and OS_WINDOWS:
            for prefix in all_ensemble_prefix:
                if tu.validate_for_ensemble_model(prefix, input_dtype,
                                                  output0_dtype, output1_dtype,
                                                  (input_size, ),
                                                  (input_size, ),
                                                  (input_size, )):
                    ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    if pf in BACKENDS:
                        _infer_exact_helper(self,
                                            prefix + pf, (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if not CPU_ONLY and tu.validate_for_trt_model(
                input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1),
            (input_size, 1, 1), (input_size, 1, 1)):
            for prefix in ensemble_prefix:
                if 'plan' in BACKENDS:
                    if input_dtype == np.int8:
                        _infer_exact_helper(self,
                                            prefix + 'plan',
                                            (input_size, 1, 1),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)
                    else:
                        _infer_exact_helper(self,
                                            prefix + 'plan', (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, (input_size, ),
                                      (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'onnx' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'onnx', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, (input_size, ),
                                          (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'libtorch' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'libtorch', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if prefix == "":
            if 'python' in BACKENDS:
                _infer_exact_helper(self,
                                    prefix + 'python', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)
コード例 #6
0
    def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
                    output0_raw, output1_raw, swap):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=True,
                                use_grpc=True,
                                skip_request_id_check=False,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                iu.infer_exact(tester, pf, (bs, ) + tensor_shape, bs,
                               input_dtype, output0_dtype, output1_dtype,
                               output0_raw, output1_raw, model_version, swap,
                               outputs, use_http, use_grpc,
                               skip_request_id_check, use_streaming,
                               correlation_id)

        input_size = 16

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for pf in ["graphdef", "savedmodel"]:
                _infer_exact_helper(self,
                                    pf, (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)

        if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            _infer_exact_helper(self,
                                'netdef', (input_size, ),
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)

        if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype,
                                     (input_size, 1, 1), (input_size, 1, 1),
                                     (input_size, 1, 1)):
            if input_dtype == np.int8:
                _infer_exact_helper(self,
                                    'plan', (input_size, 1, 1),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)
            else:
                _infer_exact_helper(self,
                                    'plan', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, (input_size, ),
                                      (input_size, ), (input_size, )):
            _infer_exact_helper(self,
                                'onnx', (input_size, ),
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, (input_size, ),
                                          (input_size, ), (input_size, )):
            _infer_exact_helper(self,
                                'libtorch', (input_size, ),
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)
コード例 #7
0
    def _full_exact(self,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    input_shape,
                    output0_shape,
                    output1_shape,
                    output0_raw=True,
                    output1_raw=True,
                    swap=False):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=True,
                                use_grpc=True,
                                skip_request_id_check=False,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(tester, pf + "_nobatch", tensor_shape, bs,
                                   input_dtype, output0_dtype, output1_dtype,
                                   output0_raw, output1_raw, model_version,
                                   swap, outputs, use_http, use_grpc,
                                   skip_request_id_check, use_streaming,
                                   correlation_id)
                # model that supports batching
                iu.infer_exact(tester, pf, tensor_shape, bs, input_dtype,
                               output0_dtype, output1_dtype, output0_raw,
                               output1_raw, model_version, swap, outputs,
                               use_http, use_grpc, skip_request_id_check,
                               use_streaming, correlation_id)

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        for prefix in all_ensemble_prefix:
            if tu.validate_for_ensemble_model(prefix, input_dtype,
                                              output0_dtype, output1_dtype,
                                              input_shape, input_shape,
                                              input_shape):
                ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    input_shape, output0_shape, output1_shape):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    _infer_exact_helper(self,
                                        prefix + pf,
                                        input_shape,
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype,
                                     input_shape, output0_shape,
                                     output1_shape):
            for prefix in ensemble_prefix:
                if input_dtype == np.int8:
                    _infer_exact_helper(self,
                                        prefix + 'plan',
                                        input_shape + (1, 1),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)
                else:
                    _infer_exact_helper(self,
                                        prefix + 'plan',
                                        input_shape,
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype,
                                    input_shape, output0_shape, output1_shape):
            for prefix in ensemble_prefix:
                _infer_exact_helper(self,
                                    prefix + 'netdef',
                                    input_shape,
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)

        # the custom model is src/custom/addsub... it does not swap
        # the inputs so always set to False
        if tu.validate_for_custom_model(input_dtype, output0_dtype,
                                        output1_dtype, input_shape,
                                        output0_shape, output1_shape):
            # No basic ensemble models are created against custom models
            _infer_exact_helper(self,
                                'custom',
                                input_shape,
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=False)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, input_shape,
                                      output0_shape, output1_shape):
            # No basic ensemble models are created against custom models [TODO]
            _infer_exact_helper(self,
                                'onnx',
                                input_shape,
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, input_shape,
                                          output0_shape, output1_shape):
            # No basic ensemble models are created against custom models [TODO]
            _infer_exact_helper(self,
                                'libtorch',
                                input_shape,
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)
コード例 #8
0
    def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
                    output0_raw, output1_raw, swap):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=True,
                                use_grpc=True,
                                use_http_json_tensors=True,
                                skip_request_id_check=True,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(
                        tester,
                        pf + "_nobatch",
                        tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw,
                        output1_raw,
                        model_version,
                        swap,
                        outputs,
                        use_http,
                        use_grpc,
                        use_http_json_tensors,
                        skip_request_id_check,
                        use_streaming,
                        correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                # model that supports batching
                iu.infer_exact(
                    tester,
                    pf, (bs, ) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw,
                    output1_raw,
                    model_version,
                    swap,
                    outputs,
                    use_http,
                    use_grpc,
                    use_http_json_tensors,
                    skip_request_id_check,
                    use_streaming,
                    correlation_id,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        input_size = 16

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        if ENSEMBLES and ("custom" in BACKENDS):
            for prefix in all_ensemble_prefix:
                if tu.validate_for_ensemble_model(prefix, input_dtype,
                                                  output0_dtype, output1_dtype,
                                                  (input_size, ),
                                                  (input_size, ),
                                                  (input_size, )):
                    ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    if pf in BACKENDS:
                        _infer_exact_helper(self,
                                            prefix + pf, (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for prefix in ensemble_prefix:
                if 'netdef' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'netdef', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if not CPU_ONLY and tu.validate_for_trt_model(
                input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1),
            (input_size, 1, 1), (input_size, 1, 1)):
            for prefix in ensemble_prefix:
                if 'plan' in BACKENDS:
                    if input_dtype == np.int8:
                        _infer_exact_helper(self,
                                            prefix + 'plan',
                                            (input_size, 1, 1),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)
                    else:
                        _infer_exact_helper(self,
                                            prefix + 'plan', (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        # the custom model is src/custom/addsub... it does not swap
        # the inputs so always set to False
        if tu.validate_for_custom_model(input_dtype, output0_dtype,
                                        output1_dtype, (input_size, ),
                                        (input_size, ), (input_size, )):
            # No basic ensemble models are created against custom models
            if 'custom' in BACKENDS:
                _infer_exact_helper(self,
                                    'custom', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=False)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, (input_size, ),
                                      (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'onnx' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'onnx', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, (input_size, ),
                                          (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'libtorch' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'libtorch', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)
コード例 #9
0
    def _full_exact(self,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    input_shape,
                    output0_shape,
                    output1_shape,
                    output0_raw=True,
                    output1_raw=True,
                    swap=False):

        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=True,
                                use_grpc=True,
                                skip_request_id_check=False,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(
                        tester,
                        pf + "_nobatch",
                        tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        model_version=model_version,
                        swap=swap,
                        outputs=outputs,
                        use_http=use_http,
                        use_grpc=use_grpc,
                        skip_request_id_check=skip_request_id_check,
                        use_streaming=use_streaming,
                        correlation_id=correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                # model that supports batching
                iu.infer_exact(
                    tester,
                    pf, (bs,) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    model_version=model_version,
                    swap=swap,
                    outputs=outputs,
                    use_http=use_http,
                    use_grpc=use_grpc,
                    skip_request_id_check=skip_request_id_check,
                    use_streaming=use_streaming,
                    correlation_id=correlation_id,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        for prefix in all_ensemble_prefix:
            if tu.validate_for_ensemble_model(prefix, input_dtype,
                                              output0_dtype, output1_dtype,
                                              input_shape, input_shape,
                                              input_shape):
                ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    input_shape, output0_shape, output1_shape):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    _infer_exact_helper(self,
                                        prefix + pf,
                                        input_shape,
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype,
                                     input_shape, output0_shape, output1_shape):
            for prefix in ensemble_prefix:
                if input_dtype == np.int8:
                    _infer_exact_helper(self,
                                        prefix + 'plan',
                                        input_shape + (1, 1),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)
                else:
                    _infer_exact_helper(self,
                                        prefix + 'plan',
                                        input_shape,
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype,
                                      input_shape, output0_shape,
                                      output1_shape):
            # No basic ensemble models are created against custom models [TODO]
            _infer_exact_helper(self,
                                'onnx',
                                input_shape,
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, input_shape,
                                          output0_shape, output1_shape):
            # No basic ensemble models are created against custom models [TODO]
            _infer_exact_helper(self,
                                'libtorch',
                                input_shape,
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)
コード例 #10
0
def create_plan_modelconfig(models_dir, max_batch, model_version, input_shape,
                            output0_shape, output1_shape, input_dtype,
                            output0_dtype, output1_dtype, input_memory_format,
                            output_memory_format, version_policy):

    if not tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype,
                                     input_shape, output0_shape,
                                     output1_shape):
        return

    # Unpack version policy
    version_policy_str = "{ latest { num_versions: 1 }}"
    if version_policy is not None:
        type, val = version_policy
        if type == 'latest':
            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(
                val)
        elif type == 'specific':
            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
        else:
            version_policy_str = "{ all { }}"

    # Use a different model name for different kinds of models
    base_name = "plan_nobatch" if max_batch == 0 else "plan"
    base_name += "_" + trt_format_to_string(
        input_memory_format) + "_" + trt_format_to_string(output_memory_format)
    model_name = tu.get_model_name(base_name, input_dtype, output0_dtype,
                                   output1_dtype)

    config_dir = models_dir + "/" + model_name
    if -1 in input_shape:
        profile_index = 0
        config = '''
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "INPUT0"
    data_type: {}
    dims: [ {} ]
  }},
  {{
    name: "INPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT0"
    data_type: {}
    dims: [ {} ]
   }},
  {{
    name: "OUTPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
instance_group [
  {{
      profile:"{}"
  }}
]
'''.format(model_name, max_batch, version_policy_str,
           np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape),
           np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape),
           np_to_model_dtype(output0_dtype),
           tu.shape_to_dims_str(output0_shape),
           np_to_model_dtype(output1_dtype),
           tu.shape_to_dims_str(output1_shape), profile_index)
    else:
        config = '''
name: "{}"
platform: "tensorrt_plan"
max_batch_size: {}
version_policy: {}
input [
  {{
    name: "INPUT0"
    data_type: {}
    dims: [ {} ]
  }},
  {{
    name: "INPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
output [
  {{
    name: "OUTPUT0"
    data_type: {}
    dims: [ {} ]
   }},
  {{
    name: "OUTPUT1"
    data_type: {}
    dims: [ {} ]
  }}
]
'''.format(model_name, max_batch, version_policy_str,
           np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape),
           np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape),
           np_to_model_dtype(output0_dtype),
           tu.shape_to_dims_str(output0_shape),
           np_to_model_dtype(output1_dtype),
           tu.shape_to_dims_str(output1_shape))

    try:
        os.makedirs(config_dir)
    except OSError as ex:
        pass  # ignore existing dir

    with open(config_dir + "/config.pbtxt", "w") as cfile:
        cfile.write(config)
コード例 #11
0
    def _full_exact(self, req_raw, input_dtype, output0_dtype, output1_dtype,
                    swap):
        input_size = 16

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_exact(self,
                               'graphdef', (input_size, ),
                               bs,
                               req_raw,
                               input_dtype,
                               output0_dtype,
                               output1_dtype,
                               swap=swap)
                iu.infer_exact(self,
                               'savedmodel', (input_size, ),
                               bs,
                               req_raw,
                               input_dtype,
                               output0_dtype,
                               output1_dtype,
                               swap=swap)
            # model that does not batching
            iu.infer_exact(self,
                           'graphdef_nobatch', (input_size, ),
                           1,
                           req_raw,
                           input_dtype,
                           output0_dtype,
                           output1_dtype,
                           swap=swap)
            iu.infer_exact(self,
                           'savedmodel_nobatch', (input_size, ),
                           1,
                           req_raw,
                           input_dtype,
                           output0_dtype,
                           output1_dtype,
                           swap=swap)

        if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_exact(self,
                               'netdef', (input_size, ),
                               bs,
                               req_raw,
                               input_dtype,
                               output0_dtype,
                               output1_dtype,
                               swap=swap)
            # model that does not batching
            iu.infer_exact(self,
                           'netdef_nobatch', (input_size, ),
                           1,
                           req_raw,
                           input_dtype,
                           output0_dtype,
                           output1_dtype,
                           swap=swap)

        if tu.validate_for_trt_model(input_dtype, output0_dtype,
                                     output1_dtype):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_exact(self,
                               'plan', (input_size, 1, 1),
                               bs,
                               req_raw,
                               input_dtype,
                               output0_dtype,
                               output1_dtype,
                               swap=swap)
            # model that does not batching
            iu.infer_exact(self,
                           'plan_nobatch', (input_size, 1, 1),
                           1,
                           req_raw,
                           input_dtype,
                           output0_dtype,
                           output1_dtype,
                           swap=swap)
コード例 #12
0
    def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
                    output0_raw, output1_raw, swap):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_http_json_tensors=True,
                                skip_request_id_check=True,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(
                        tester,
                        pf + "_nobatch",
                        tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        model_version=model_version,
                        swap=swap,
                        outputs=outputs,
                        use_http=use_http,
                        use_grpc=use_grpc,
                        use_http_json_tensors=use_http_json_tensors,
                        skip_request_id_check=skip_request_id_check,
                        use_streaming=use_streaming,
                        correlation_id=correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

                # model that supports batching.
                iu.infer_exact(
                    tester,
                    pf, (bs, ) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    model_version=model_version,
                    swap=swap,
                    outputs=outputs,
                    use_http=use_http,
                    use_grpc=use_grpc,
                    use_http_json_tensors=use_http_json_tensors,
                    skip_request_id_check=skip_request_id_check,
                    use_streaming=use_streaming,
                    correlation_id=correlation_id,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        input_size = 16

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        if ENSEMBLES:
            for prefix in all_ensemble_prefix:
                if tu.validate_for_ensemble_model(prefix, input_dtype,
                                                  output0_dtype, output1_dtype,
                                                  (input_size, ),
                                                  (input_size, ),
                                                  (input_size, )):
                    ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    if pf in BACKENDS:
                        _infer_exact_helper(self,
                                            prefix + pf, (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if not CPU_ONLY and tu.validate_for_trt_model(
                input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1),
            (input_size, 1, 1), (input_size, 1, 1)):
            for prefix in ensemble_prefix:
                if 'plan' in BACKENDS:
                    if input_dtype == np.int8:
                        _infer_exact_helper(self,
                                            prefix + 'plan',
                                            (input_size, 1, 1),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)
                    else:
                        _infer_exact_helper(self,
                                            prefix + 'plan', (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, (input_size, ),
                                      (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'onnx' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'onnx', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, (input_size, ),
                                          (input_size, ), (input_size, )):
            # Due to PyTorch bug
            # https://github.com/pytorch/pytorch/issues/66930 we can't
            # run this test with int8 input and int32 outputs.
            if ((input_dtype == np.int8) and (output0_dtype == np.int32)
                    and (output1_dtype == np.int32)):
                print('skipping pytorch test for int8_int32_int32')
            else:
                for prefix in ensemble_prefix:
                    if 'libtorch' in BACKENDS:
                        # Skip batching for PyTorch String I/O
                        if ((input_dtype == np_dtype_string)
                                or (output0_dtype == np_dtype_string)
                                or (output1_dtype == np_dtype_string)):
                            iu.infer_exact(
                                self,
                                prefix + 'libtorch_nobatch',
                                (input_size, ),
                                1,  # batch_size
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap,
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_system_shared_memory=
                                TEST_SYSTEM_SHARED_MEMORY,
                                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                        else:
                            _infer_exact_helper(self,
                                                prefix + 'libtorch',
                                                (input_size, ),
                                                8,
                                                input_dtype,
                                                output0_dtype,
                                                output1_dtype,
                                                output0_raw=output0_raw,
                                                output1_raw=output1_raw,
                                                swap=swap)

        for prefix in ensemble_prefix:
            if prefix != "":
                continue

            if 'python_dlpack' in BACKENDS:
                _infer_exact_helper(self,
                                    prefix + 'python_dlpack', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)
            elif 'python' in BACKENDS:
                _infer_exact_helper(self,
                                    prefix + 'python', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)