Example #1
0
    def _full_zero(self, dtype, shapes):
        # 'shapes' is list of shapes, one for each input.

        # For validation assume any shape can be used...
        if tu.validate_for_tf_model(dtype, dtype, dtype, shapes[0], shapes[0],
                                    shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'graphdef', bs, dtype, shapes, shapes)
                iu.infer_zero(self, 'savedmodel', bs, dtype, shapes, shapes)
            # model that does not support batching
            iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, shapes, shapes)
            iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, shapes, shapes)

        if tu.validate_for_c2_model(dtype, dtype, dtype, shapes[0], shapes[0],
                                    shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'netdef', bs, dtype, shapes, shapes)
            # model that does not support batching
            iu.infer_zero(self, 'netdef_nobatch', 1, dtype, shapes, shapes)

        if tu.validate_for_onnx_model(dtype, dtype, dtype, shapes[0],
                                      shapes[0], shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'onnx', bs, dtype, shapes, shapes)
            # model that does not support batching
            iu.infer_zero(self, 'onnx_nobatch', 1, dtype, shapes, shapes)

        for name in ["simple_zero", "sequence_zero", "fan_zero"]:
            if tu.validate_for_ensemble_model(name, dtype, dtype, dtype,
                                              shapes[0], shapes[0], shapes[0]):
                # model that supports batching
                for bs in (1, 8):
                    iu.infer_zero(self, name, bs, dtype, shapes, shapes)
                # model that does not support batching
                iu.infer_zero(self, name + '_nobatch', 1, dtype, shapes,
                              shapes)
Example #2
0
    def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
                    output0_raw, output1_raw, swap):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_http_json_tensors=True,
                                skip_request_id_check=True,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(
                        tester,
                        pf + "_nobatch",
                        tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        model_version=model_version,
                        swap=swap,
                        outputs=outputs,
                        use_http=use_http,
                        use_grpc=use_grpc,
                        use_http_json_tensors=use_http_json_tensors,
                        skip_request_id_check=skip_request_id_check,
                        use_streaming=use_streaming,
                        correlation_id=correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                # model that supports batching
                iu.infer_exact(
                    tester,
                    pf, (bs, ) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    model_version=model_version,
                    swap=swap,
                    outputs=outputs,
                    use_http=use_http,
                    use_grpc=use_grpc,
                    use_http_json_tensors=use_http_json_tensors,
                    skip_request_id_check=skip_request_id_check,
                    use_streaming=use_streaming,
                    correlation_id=correlation_id,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        input_size = 16

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        if ENSEMBLES and OS_WINDOWS:
            for prefix in all_ensemble_prefix:
                if tu.validate_for_ensemble_model(prefix, input_dtype,
                                                  output0_dtype, output1_dtype,
                                                  (input_size, ),
                                                  (input_size, ),
                                                  (input_size, )):
                    ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    if pf in BACKENDS:
                        _infer_exact_helper(self,
                                            prefix + pf, (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if not CPU_ONLY and tu.validate_for_trt_model(
                input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1),
            (input_size, 1, 1), (input_size, 1, 1)):
            for prefix in ensemble_prefix:
                if 'plan' in BACKENDS:
                    if input_dtype == np.int8:
                        _infer_exact_helper(self,
                                            prefix + 'plan',
                                            (input_size, 1, 1),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)
                    else:
                        _infer_exact_helper(self,
                                            prefix + 'plan', (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, (input_size, ),
                                      (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'onnx' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'onnx', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, (input_size, ),
                                          (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'libtorch' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'libtorch', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if prefix == "":
            if 'python' in BACKENDS:
                _infer_exact_helper(self,
                                    prefix + 'python', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)
Example #3
0
    def _full_exact(self,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    input_shape,
                    output0_shape,
                    output1_shape,
                    output0_raw=True,
                    output1_raw=True,
                    swap=False):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=True,
                                use_grpc=True,
                                skip_request_id_check=False,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(tester, pf + "_nobatch", tensor_shape, bs,
                                   input_dtype, output0_dtype, output1_dtype,
                                   output0_raw, output1_raw, model_version,
                                   swap, outputs, use_http, use_grpc,
                                   skip_request_id_check, use_streaming,
                                   correlation_id)
                # model that supports batching
                iu.infer_exact(tester, pf, tensor_shape, bs, input_dtype,
                               output0_dtype, output1_dtype, output0_raw,
                               output1_raw, model_version, swap, outputs,
                               use_http, use_grpc, skip_request_id_check,
                               use_streaming, correlation_id)

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        for prefix in all_ensemble_prefix:
            if tu.validate_for_ensemble_model(prefix, input_dtype,
                                              output0_dtype, output1_dtype,
                                              input_shape, input_shape,
                                              input_shape):
                ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    input_shape, output0_shape, output1_shape):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    _infer_exact_helper(self,
                                        prefix + pf,
                                        input_shape,
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype,
                                    input_shape, output0_shape, output1_shape):
            for prefix in ensemble_prefix:
                _infer_exact_helper(self,
                                    prefix + 'netdef',
                                    input_shape,
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)

        # the custom model is src/custom/addsub... it does not swap
        # the inputs so always set to False
        if tu.validate_for_custom_model(input_dtype, output0_dtype,
                                        output1_dtype, input_shape,
                                        output0_shape, output1_shape):
            # No basic ensemble models are created against custom models
            _infer_exact_helper(self,
                                'custom',
                                input_shape,
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=False)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, input_shape,
                                      output0_shape, output1_shape):
            # No basic ensemble models are created against custom models [TODO]
            _infer_exact_helper(self,
                                'onnx',
                                input_shape,
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)
    def _full_reshape(self,
                      dtype,
                      input_shapes,
                      output_shapes=None,
                      no_batch=True):
        # 'shapes' is list of shapes, one for each input.
        if output_shapes is None:
            output_shapes = input_shapes

        # For validation assume any shape can be used...
        if tu.validate_for_tf_model(dtype, dtype, dtype, input_shapes[0],
                                    input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                full_shapes = [[
                    bs,
                ] + input_shape for input_shape in input_shapes]
                full_output_shapes = [[
                    bs,
                ] + output_shape for output_shape in output_shapes]
                iu.infer_zero(
                    self,
                    'graphdef',
                    bs,
                    dtype,
                    full_shapes,
                    full_output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                iu.infer_zero(
                    self,
                    'savedmodel',
                    bs,
                    dtype,
                    full_shapes,
                    full_output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(
                    self,
                    'graphdef_nobatch',
                    1,
                    dtype,
                    input_shapes,
                    output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                iu.infer_zero(
                    self,
                    'savedmodel_nobatch',
                    1,
                    dtype,
                    input_shapes,
                    output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        if tu.validate_for_onnx_model(dtype, dtype, dtype, input_shapes[0],
                                      input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                full_shapes = [[
                    bs,
                ] + input_shape for input_shape in input_shapes]
                full_output_shapes = [[
                    bs,
                ] + output_shape for output_shape in output_shapes]
                iu.infer_zero(
                    self,
                    'onnx',
                    bs,
                    dtype,
                    full_shapes,
                    full_output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(
                    self,
                    'onnx_nobatch',
                    1,
                    dtype,
                    input_shapes,
                    output_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        # Skip for libtorch string I/O
        if tu.validate_for_libtorch_model(dtype, dtype, dtype, input_shapes[0],
                                          input_shapes[0], input_shapes[0]) and \
                                              (dtype != np_dtype_string):
            # skip variable size reshape on libtorch for now,
            # see "gen_qa_reshape_model.py" for detail
            if dtype != np.int32:
                # model that does not support batching
                if no_batch:
                    iu.infer_zero(
                        self,
                        'libtorch_nobatch',
                        1,
                        dtype,
                        input_shapes,
                        output_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

                # model that supports batching
                for bs in (1, 8):
                    full_shapes = [[
                        bs,
                    ] + input_shape for input_shape in input_shapes]
                    full_output_shapes = [[
                        bs,
                    ] + output_shape for output_shape in output_shapes]
                    iu.infer_zero(
                        self,
                        'libtorch',
                        bs,
                        dtype,
                        full_shapes,
                        full_output_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]:
            # [TODO] Skip variable size reshape on ensemble for now.
            # Need rework on how ensemble for reshape are generated
            if dtype == np.int32:
                break
            if tu.validate_for_ensemble_model(name, dtype, dtype, dtype,
                                              input_shapes[0], input_shapes[0],
                                              input_shapes[0]):
                # model that supports batching
                for bs in (1, 8):
                    full_shapes = [[
                        bs,
                    ] + input_shape for input_shape in input_shapes]
                    full_output_shapes = [[
                        bs,
                    ] + output_shape for output_shape in output_shapes]
                    iu.infer_zero(
                        self,
                        name,
                        bs,
                        dtype,
                        full_shapes,
                        full_output_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                # model that does not support batching
                if no_batch:
                    iu.infer_zero(
                        self,
                        name + '_nobatch',
                        1,
                        dtype,
                        input_shapes,
                        output_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
    def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
                    output0_raw, output1_raw, swap):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=True,
                                use_grpc=True,
                                skip_request_id_check=False,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                iu.infer_exact(tester, pf, (bs, ) + tensor_shape, bs,
                               input_dtype, output0_dtype, output1_dtype,
                               output0_raw, output1_raw, model_version, swap,
                               outputs, use_http, use_grpc,
                               skip_request_id_check, use_streaming,
                               correlation_id)

        input_size = 16

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for pf in ["graphdef", "savedmodel"]:
                _infer_exact_helper(self,
                                    pf, (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)

        if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            _infer_exact_helper(self,
                                'netdef', (input_size, ),
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)

        if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype,
                                     (input_size, 1, 1), (input_size, 1, 1),
                                     (input_size, 1, 1)):
            if input_dtype == np.int8:
                _infer_exact_helper(self,
                                    'plan', (input_size, 1, 1),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)
            else:
                _infer_exact_helper(self,
                                    'plan', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, (input_size, ),
                                      (input_size, ), (input_size, )):
            _infer_exact_helper(self,
                                'onnx', (input_size, ),
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, (input_size, ),
                                          (input_size, ), (input_size, )):
            _infer_exact_helper(self,
                                'libtorch', (input_size, ),
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)
Example #6
0
    def _full_zero(self, dtype, shapes):
        # 'shapes' is list of shapes, one for each input.

        # For validation assume any shape can be used...
        if tu.validate_for_tf_model(dtype, dtype, dtype, shapes[0], shapes[0],
                                    shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                batch_shapes = [[
                    bs,
                ] + shape for shape in shapes]
                iu.infer_zero(
                    self,
                    'graphdef',
                    bs,
                    dtype,
                    batch_shapes,
                    batch_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                iu.infer_zero(
                    self,
                    'savedmodel',
                    bs,
                    dtype,
                    batch_shapes,
                    batch_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            # model that does not support batching
            iu.infer_zero(self,
                          'graphdef_nobatch',
                          1,
                          dtype,
                          shapes,
                          shapes,
                          use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                          use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            iu.infer_zero(self,
                          'savedmodel_nobatch',
                          1,
                          dtype,
                          shapes,
                          shapes,
                          use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                          use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        if tu.validate_for_onnx_model(dtype, dtype, dtype, shapes[0],
                                      shapes[0], shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                batch_shapes = [[
                    bs,
                ] + shape for shape in shapes]
                iu.infer_zero(
                    self,
                    'onnx',
                    bs,
                    dtype,
                    batch_shapes,
                    batch_shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
            # model that does not support batching
            iu.infer_zero(self,
                          'onnx_nobatch',
                          1,
                          dtype,
                          shapes,
                          shapes,
                          use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                          use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        for name in ["simple_zero", "sequence_zero", "fan_zero"]:
            if tu.validate_for_ensemble_model(name, dtype, dtype, dtype,
                                              shapes[0], shapes[0], shapes[0]):
                # model that supports batching
                for bs in (1, 8):
                    batch_shapes = [[
                        bs,
                    ] + shape for shape in shapes]
                    iu.infer_zero(
                        self,
                        name,
                        bs,
                        dtype,
                        batch_shapes,
                        batch_shapes,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                # model that does not support batching
                iu.infer_zero(
                    self,
                    name + '_nobatch',
                    1,
                    dtype,
                    shapes,
                    shapes,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
    def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
                    output0_raw, output1_raw, swap):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=True,
                                use_grpc=True,
                                use_http_json_tensors=True,
                                skip_request_id_check=True,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(
                        tester,
                        pf + "_nobatch",
                        tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw,
                        output1_raw,
                        model_version,
                        swap,
                        outputs,
                        use_http,
                        use_grpc,
                        use_http_json_tensors,
                        skip_request_id_check,
                        use_streaming,
                        correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                # model that supports batching
                iu.infer_exact(
                    tester,
                    pf, (bs, ) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw,
                    output1_raw,
                    model_version,
                    swap,
                    outputs,
                    use_http,
                    use_grpc,
                    use_http_json_tensors,
                    skip_request_id_check,
                    use_streaming,
                    correlation_id,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        input_size = 16

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        if ENSEMBLES and ("custom" in BACKENDS):
            for prefix in all_ensemble_prefix:
                if tu.validate_for_ensemble_model(prefix, input_dtype,
                                                  output0_dtype, output1_dtype,
                                                  (input_size, ),
                                                  (input_size, ),
                                                  (input_size, )):
                    ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    if pf in BACKENDS:
                        _infer_exact_helper(self,
                                            prefix + pf, (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for prefix in ensemble_prefix:
                if 'netdef' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'netdef', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if not CPU_ONLY and tu.validate_for_trt_model(
                input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1),
            (input_size, 1, 1), (input_size, 1, 1)):
            for prefix in ensemble_prefix:
                if 'plan' in BACKENDS:
                    if input_dtype == np.int8:
                        _infer_exact_helper(self,
                                            prefix + 'plan',
                                            (input_size, 1, 1),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)
                    else:
                        _infer_exact_helper(self,
                                            prefix + 'plan', (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        # the custom model is src/custom/addsub... it does not swap
        # the inputs so always set to False
        if tu.validate_for_custom_model(input_dtype, output0_dtype,
                                        output1_dtype, (input_size, ),
                                        (input_size, ), (input_size, )):
            # No basic ensemble models are created against custom models
            if 'custom' in BACKENDS:
                _infer_exact_helper(self,
                                    'custom', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=False)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, (input_size, ),
                                      (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'onnx' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'onnx', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, (input_size, ),
                                          (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'libtorch' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'libtorch', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)
    def _full_exact(self,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    input_shape,
                    output0_shape,
                    output1_shape,
                    output0_raw=True,
                    output1_raw=True,
                    swap=False):

        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=True,
                                use_grpc=True,
                                skip_request_id_check=False,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(
                        tester,
                        pf + "_nobatch",
                        tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        model_version=model_version,
                        swap=swap,
                        outputs=outputs,
                        use_http=use_http,
                        use_grpc=use_grpc,
                        skip_request_id_check=skip_request_id_check,
                        use_streaming=use_streaming,
                        correlation_id=correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                # model that supports batching
                iu.infer_exact(
                    tester,
                    pf, (bs,) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    model_version=model_version,
                    swap=swap,
                    outputs=outputs,
                    use_http=use_http,
                    use_grpc=use_grpc,
                    skip_request_id_check=skip_request_id_check,
                    use_streaming=use_streaming,
                    correlation_id=correlation_id,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        for prefix in all_ensemble_prefix:
            if tu.validate_for_ensemble_model(prefix, input_dtype,
                                              output0_dtype, output1_dtype,
                                              input_shape, input_shape,
                                              input_shape):
                ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    input_shape, output0_shape, output1_shape):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    _infer_exact_helper(self,
                                        prefix + pf,
                                        input_shape,
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype,
                                     input_shape, output0_shape, output1_shape):
            for prefix in ensemble_prefix:
                if input_dtype == np.int8:
                    _infer_exact_helper(self,
                                        prefix + 'plan',
                                        input_shape + (1, 1),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)
                else:
                    _infer_exact_helper(self,
                                        prefix + 'plan',
                                        input_shape,
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype,
                                      input_shape, output0_shape,
                                      output1_shape):
            # No basic ensemble models are created against custom models [TODO]
            _infer_exact_helper(self,
                                'onnx',
                                input_shape,
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, input_shape,
                                          output0_shape, output1_shape):
            # No basic ensemble models are created against custom models [TODO]
            _infer_exact_helper(self,
                                'libtorch',
                                input_shape,
                                8,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap)
    def _full_reshape(self,
                      dtype,
                      input_shapes,
                      output_shapes=None,
                      no_batch=True):
        # 'shapes' is list of shapes, one for each input.
        if output_shapes is None:
            output_shapes = input_shapes

        # For validation assume any shape can be used...
        if tu.validate_for_tf_model(dtype, dtype, dtype, input_shapes[0],
                                    input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'graphdef', bs, dtype, input_shapes,
                              output_shapes)
                iu.infer_zero(self, 'savedmodel', bs, dtype, input_shapes,
                              output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, input_shapes,
                              output_shapes)
                iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype,
                              input_shapes, output_shapes)

        if tu.validate_for_c2_model(dtype, dtype, dtype, input_shapes[0],
                                    input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'netdef', bs, dtype, input_shapes,
                              output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'netdef_nobatch', 1, dtype, input_shapes,
                              output_shapes)

        if tu.validate_for_custom_model(dtype, dtype, dtype, input_shapes[0],
                                        input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'custom', bs, dtype, input_shapes,
                              output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'custom_nobatch', 1, dtype, input_shapes,
                              output_shapes)

        if tu.validate_for_onnx_model(dtype, dtype, dtype, input_shapes[0],
                                      input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'onnx', bs, dtype, input_shapes,
                              output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'onnx_nobatch', 1, dtype, input_shapes,
                              output_shapes)

        if tu.validate_for_libtorch_model(dtype, dtype, dtype, input_shapes[0],
                                          input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'libtorch', bs, dtype, input_shapes,
                              output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'libtorch_nobatch', 1, dtype, input_shapes,
                              output_shapes)

        for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]:
            if tu.validate_for_ensemble_model(name, dtype, dtype, dtype,
                                              input_shapes[0], input_shapes[0],
                                              input_shapes[0]):
                if len(input_shapes) <= 2:
                    # Skip cases that reshape to zero-sized tensors
                    # (know from qa/common/gen_qa_reshape_model.py)
                    return
                # model that supports batching
                for bs in (1, 8):
                    iu.infer_zero(self, name, bs, dtype, input_shapes,
                                  output_shapes)
                # model that does not support batching
                if no_batch:
                    iu.infer_zero(self, name + '_nobatch', 1, dtype,
                                  input_shapes, output_shapes)
Example #10
0
    def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True):
        # 'shapes' is list of shapes, one for each input.
        if output_shapes is None:
            output_shapes = input_shapes

        # For validation assume any shape can be used...
        if tu.validate_for_tf_model(dtype, dtype, dtype,
                                    input_shapes[0], input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'graphdef', bs, dtype, input_shapes, output_shapes)
                iu.infer_zero(self, 'savedmodel', bs, dtype, input_shapes, output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, input_shapes, output_shapes)
                iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, input_shapes, output_shapes)

        if tu.validate_for_c2_model(dtype, dtype, dtype,
                                    input_shapes[0], input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'netdef', bs, dtype, input_shapes, output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'netdef_nobatch', 1, dtype, input_shapes, output_shapes)

        if tu.validate_for_custom_model(dtype, dtype, dtype,
                                    input_shapes[0], input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'custom', bs, dtype, input_shapes, output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'custom_nobatch', 1, dtype, input_shapes, output_shapes)

        if tu.validate_for_onnx_model(dtype, dtype, dtype,
                                    input_shapes[0], input_shapes[0], input_shapes[0]):
            # model that supports batching
            for bs in (1, 8):
                iu.infer_zero(self, 'onnx', bs, dtype, input_shapes, output_shapes)
            # model that does not support batching
            if no_batch:
                iu.infer_zero(self, 'onnx_nobatch', 1, dtype, input_shapes, output_shapes)

        if tu.validate_for_libtorch_model(dtype, dtype, dtype,
                                    input_shapes[0], input_shapes[0], input_shapes[0]):
            # skip variable size reshape on libtorch for now,
            # see "gen_qa_reshape_model.py" for detail
            if dtype != np.int32:
                # model that supports batching
                for bs in (1, 8):
                    iu.infer_zero(self, 'libtorch', bs, dtype, input_shapes, output_shapes)
                # model that does not support batching
                if no_batch:
                    iu.infer_zero(self, 'libtorch_nobatch', 1, dtype, input_shapes, output_shapes)

        for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]:
            # [TODO] Skip variable size reshape on ensemble for now.
            # Need rework on how ensemble for reshape are generated
            if dtype == np.int32:
                break
            if tu.validate_for_ensemble_model(name, dtype, dtype, dtype,
                                        input_shapes[0], input_shapes[0], input_shapes[0]):
                # model that supports batching
                for bs in (1, 8):
                    iu.infer_zero(self, name, bs, dtype, input_shapes, output_shapes)
                # model that does not support batching
                if no_batch:
                    iu.infer_zero(self, name + '_nobatch', 1, dtype, input_shapes, output_shapes)
    def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
                    output0_raw, output1_raw, swap):
        def _infer_exact_helper(tester,
                                pf,
                                tensor_shape,
                                batch_size,
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=True,
                                output1_raw=True,
                                model_version=None,
                                swap=False,
                                outputs=("OUTPUT0", "OUTPUT1"),
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_http_json_tensors=True,
                                skip_request_id_check=True,
                                use_streaming=True,
                                correlation_id=0):
            for bs in (1, batch_size):
                # model that does not support batching
                if bs == 1:
                    iu.infer_exact(
                        tester,
                        pf + "_nobatch",
                        tensor_shape,
                        bs,
                        input_dtype,
                        output0_dtype,
                        output1_dtype,
                        output0_raw=output0_raw,
                        output1_raw=output1_raw,
                        model_version=model_version,
                        swap=swap,
                        outputs=outputs,
                        use_http=use_http,
                        use_grpc=use_grpc,
                        use_http_json_tensors=use_http_json_tensors,
                        skip_request_id_check=skip_request_id_check,
                        use_streaming=use_streaming,
                        correlation_id=correlation_id,
                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

                # model that supports batching.
                iu.infer_exact(
                    tester,
                    pf, (bs, ) + tensor_shape,
                    bs,
                    input_dtype,
                    output0_dtype,
                    output1_dtype,
                    output0_raw=output0_raw,
                    output1_raw=output1_raw,
                    model_version=model_version,
                    swap=swap,
                    outputs=outputs,
                    use_http=use_http,
                    use_grpc=use_grpc,
                    use_http_json_tensors=use_http_json_tensors,
                    skip_request_id_check=skip_request_id_check,
                    use_streaming=use_streaming,
                    correlation_id=correlation_id,
                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)

        input_size = 16

        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
        ensemble_prefix = [""]
        if ENSEMBLES:
            for prefix in all_ensemble_prefix:
                if tu.validate_for_ensemble_model(prefix, input_dtype,
                                                  output0_dtype, output1_dtype,
                                                  (input_size, ),
                                                  (input_size, ),
                                                  (input_size, )):
                    ensemble_prefix.append(prefix)

        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
                                    (input_size, ), (input_size, ),
                                    (input_size, )):
            for prefix in ensemble_prefix:
                for pf in ["graphdef", "savedmodel"]:
                    if pf in BACKENDS:
                        _infer_exact_helper(self,
                                            prefix + pf, (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if not CPU_ONLY and tu.validate_for_trt_model(
                input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1),
            (input_size, 1, 1), (input_size, 1, 1)):
            for prefix in ensemble_prefix:
                if 'plan' in BACKENDS:
                    if input_dtype == np.int8:
                        _infer_exact_helper(self,
                                            prefix + 'plan',
                                            (input_size, 1, 1),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)
                    else:
                        _infer_exact_helper(self,
                                            prefix + 'plan', (input_size, ),
                                            8,
                                            input_dtype,
                                            output0_dtype,
                                            output1_dtype,
                                            output0_raw=output0_raw,
                                            output1_raw=output1_raw,
                                            swap=swap)

        if tu.validate_for_onnx_model(input_dtype, output0_dtype,
                                      output1_dtype, (input_size, ),
                                      (input_size, ), (input_size, )):
            for prefix in ensemble_prefix:
                if 'onnx' in BACKENDS:
                    _infer_exact_helper(self,
                                        prefix + 'onnx', (input_size, ),
                                        8,
                                        input_dtype,
                                        output0_dtype,
                                        output1_dtype,
                                        output0_raw=output0_raw,
                                        output1_raw=output1_raw,
                                        swap=swap)

        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
                                          output1_dtype, (input_size, ),
                                          (input_size, ), (input_size, )):
            # Due to PyTorch bug
            # https://github.com/pytorch/pytorch/issues/66930 we can't
            # run this test with int8 input and int32 outputs.
            if ((input_dtype == np.int8) and (output0_dtype == np.int32)
                    and (output1_dtype == np.int32)):
                print('skipping pytorch test for int8_int32_int32')
            else:
                for prefix in ensemble_prefix:
                    if 'libtorch' in BACKENDS:
                        # Skip batching for PyTorch String I/O
                        if ((input_dtype == np_dtype_string)
                                or (output0_dtype == np_dtype_string)
                                or (output1_dtype == np_dtype_string)):
                            iu.infer_exact(
                                self,
                                prefix + 'libtorch_nobatch',
                                (input_size, ),
                                1,  # batch_size
                                input_dtype,
                                output0_dtype,
                                output1_dtype,
                                output0_raw=output0_raw,
                                output1_raw=output1_raw,
                                swap=swap,
                                use_http=USE_HTTP,
                                use_grpc=USE_GRPC,
                                use_system_shared_memory=
                                TEST_SYSTEM_SHARED_MEMORY,
                                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
                        else:
                            _infer_exact_helper(self,
                                                prefix + 'libtorch',
                                                (input_size, ),
                                                8,
                                                input_dtype,
                                                output0_dtype,
                                                output1_dtype,
                                                output0_raw=output0_raw,
                                                output1_raw=output1_raw,
                                                swap=swap)

        for prefix in ensemble_prefix:
            if prefix != "":
                continue

            if 'python_dlpack' in BACKENDS:
                _infer_exact_helper(self,
                                    prefix + 'python_dlpack', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)
            elif 'python' in BACKENDS:
                _infer_exact_helper(self,
                                    prefix + 'python', (input_size, ),
                                    8,
                                    input_dtype,
                                    output0_dtype,
                                    output1_dtype,
                                    output0_raw=output0_raw,
                                    output1_raw=output1_raw,
                                    swap=swap)