Ejemplos de create_shared_memory_region en Python, ejemplos de tensorrtserver.shared_memory.create_shared_memory_region en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: shm_util.py Proyecto: intellisyscorp/fitzme-triton-inference-server

def create_register_set_either_shm_region(shm_region_names, input_list, input_byte_size,
                                        output_byte_size, shared_memory_ctx,
                                        use_system_shared_memory, use_cuda_shared_memory):
    if use_cuda_shared_memory and use_system_shared_memory:
        raise ValueError("Cannot set both System and CUDA shared memory flags to 1")

    if not (use_system_shared_memory or use_cuda_shared_memory):
        return []

    if use_system_shared_memory:
        shm_ip_handle = cudashm.create_shared_memory_region(shm_region_names[0]+"_data",
                                                            input_byte_size, 0)
        shm_op_handle = cudashm.create_shared_memory_region(shm_region_names[1]+"_data",
                                                            output_byte_size, 0)
        shared_memory_ctx.cuda_register(shm_ip_handle)
        shared_memory_ctx.cuda_register(shm_op_handle)
        # copy data into shared memory region for input values
        cudashm.set_shared_memory_region(shm_ip_handle, input_list)
    elif use_cuda_shared_memory:
        shm_ip_handle = shm.create_shared_memory_region(shm_region_names[0]+"_data",\
                                    "/"+shm_region_names[0], input_byte_size)
        shm_op_handle = shm.create_shared_memory_region(shm_region_names[1]+"_data",\
                                    "/"+shm_region_names[1], output_byte_size)
        shared_memory_ctx.register(shm_ip_handle)
        shared_memory_ctx.register(shm_op_handle)
        # copy data into shared memory region for input values
        shm.set_shared_memory_region(shm_ip_handle, input_list)
    
    return [shm_ip_handle, shm_op_handle]

Ejemplo n.º 2

0

Mostrar archivo

Archivo: shared_memory_test.py Proyecto: intellisyscorp/fitzme-triton-inference-server

 def _configure_sever(self):
     shm_op0_handle = shm.create_shared_memory_region("output0_data", "/output0_data", 64)
     shm_op1_handle = shm.create_shared_memory_region("output1_data", "/output1_data", 64)
     shm_ip0_handle = shm.create_shared_memory_region("input0_data", "/input0_data", 64)
     shm_ip1_handle = shm.create_shared_memory_region("input1_data", "/input1_data", 64)
     input0_data = np.arange(start=0, stop=16, dtype=np.int32)
     input1_data = np.ones(shape=16, dtype=np.int32)
     shm.set_shared_memory_region(shm_ip0_handle, [input0_data])
     shm.set_shared_memory_region(shm_ip1_handle, [input1_data])
     shared_memory_ctx = SharedMemoryControlContext(_url,  _protocol, verbose=True)
     shared_memory_ctx.register(shm_ip0_handle)
     shared_memory_ctx.register(shm_ip1_handle)
     shared_memory_ctx.register(shm_op0_handle)
     shared_memory_ctx.register(shm_op1_handle)
     return [shm_ip0_handle, shm_ip1_handle, shm_op0_handle, shm_op1_handle]

Ejemplo n.º 3

0

Mostrar archivo

Archivo: shared_memory_test.py Proyecto: intellisyscorp/fitzme-triton-inference-server

 def test_invalid_create_shm(self):
     # Raises error since tried to create invalid cuda shared memory region
     try:
         shm_op0_handle = shm.create_shared_memory_region("dummy_data", "/dummy_data", -1)
         shm.destroy_shared_memory_region(shm_op0_handle)
     except Exception as ex:
         self.assertTrue(str(ex) == "unable to initialize the size")

Ejemplo n.º 4

0

Mostrar archivo

Archivo: shared_memory_test.py Proyecto: intellisyscorp/fitzme-triton-inference-server

 def test_unregister_before_register(self):
     # Create a valid cuda shared memory region and unregister before register
     shared_memory_ctx = SharedMemoryControlContext(_url,  _protocol, verbose=True)
     shm_op0_handle = shm.create_shared_memory_region("dummy_data", "/dummy_data", 8)
     shared_memory_ctx.unregister(shm_op0_handle)
     shm_status = shared_memory_ctx.get_shared_memory_status()
     self.assertTrue(len(shm_status.shared_memory_region) == 0)
     shm.destroy_shared_memory_region(shm_op0_handle)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: shared_memory_test.py Proyecto: intellisyscorp/fitzme-triton-inference-server

 def test_valid_create_set_register(self):
     # Create a valid cuda shared memory region, fill data in it and register
     shared_memory_ctx = SharedMemoryControlContext(_url,  _protocol, verbose=True)
     shm_op0_handle = shm.create_shared_memory_region("dummy_data", "/dummy_data", 8)
     shm.set_shared_memory_region(shm_op0_handle, [np.array([1,2], dtype=np.float32)])
     shared_memory_ctx.register(shm_op0_handle)
     shm_status = shared_memory_ctx.get_shared_memory_status()
     self.assertTrue(len(shm_status.shared_memory_region) == 1)
     shm.destroy_shared_memory_region(shm_op0_handle)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: sequence_util.py Proyecto: zhangxuann/tensorrt-inference-server

    def precreate_register_regions(self, value_list, dtype, i, batch_size=1):
        if _test_system_shared_memory or _test_cuda_shared_memory:
            shared_memory_ctx = SharedMemoryControlContext("localhost:8000",  ProtocolType.HTTP, verbose=True)
            shm_region_handles = []
            for j, value in enumerate(value_list):
                # create data
                input_list = list()
                for b in range(batch_size):
                    if dtype == np.object:
                        in0 = np.full((1,), value, dtype=np.int32)
                        in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object)
                        in0 = in0n.reshape((1,))
                    else:
                        in0 = np.full((1,), value, dtype=dtype)
                    input_list.append(in0)

                input_list_tmp = iu._prepend_string_size(input_list) if (dtype == np.object) else input_list
                input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])
                output_byte_size = np.dtype(dtype).itemsize + 2

                # create shared memory regions and copy data for input values
                if _test_system_shared_memory:
                    shm_ip_handle = shm.create_shared_memory_region(
                        'ip{}{}_data'.format(i,j), '/ip{}{}'.format(i,j), input_byte_size)
                    shm_op_handle = shm.create_shared_memory_region(
                        'op{}{}_data'.format(i,j), '/op{}{}'.format(i,j), output_byte_size)
                    shm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
                    shared_memory_ctx.register(shm_ip_handle)
                    shared_memory_ctx.register(shm_op_handle)
                elif _test_cuda_shared_memory:
                    shm_ip_handle = cudashm.create_shared_memory_region(
                        'ip{}{}_data'.format(i,j), input_byte_size, 0)
                    shm_op_handle = cudashm.create_shared_memory_region(
                        'op{}{}_data'.format(i,j), output_byte_size, 0)
                    cudashm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
                    shared_memory_ctx.cuda_register(shm_ip_handle)
                    shared_memory_ctx.cuda_register(shm_op_handle)
                shm_region_handles.append(shm_ip_handle)
                shm_region_handles.append(shm_op_handle)
            return shm_region_handles
        else:
            return []

Ejemplo n.º 7

0

Mostrar archivo

Archivo: shared_memory_test.py Proyecto: intellisyscorp/fitzme-triton-inference-server

 def test_reregister_after_register(self):
     # Create a valid cuda shared memory region and unregister after register
     shared_memory_ctx = SharedMemoryControlContext(_url,  _protocol, verbose=True)
     shm_op0_handle = shm.create_shared_memory_region("dummy_data", "/dummy_data", 8)
     shared_memory_ctx.register(shm_op0_handle)
     try:
         shared_memory_ctx.register(shm_op0_handle)
     except Exception as ex:
         self.assertTrue("shared memory region 'dummy_data' already in manager" in str(ex))
     shm_status = shared_memory_ctx.get_shared_memory_status()
     self.assertTrue(len(shm_status.shared_memory_region) == 1)
     shm.destroy_shared_memory_region(shm_op0_handle)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: shared_memory_test.py Proyecto: intellisyscorp/fitzme-triton-inference-server

 def test_too_big_shm(self):
     # Shared memory input region larger than needed - Throws error
     error_msg = []
     shm_handles = self._configure_sever()
     shm_ip2_handle = shm.create_shared_memory_region("input2_data", "/input2_data", 128)
     shared_memory_ctx = SharedMemoryControlContext(_url,  _protocol, verbose=True)
     shared_memory_ctx.register(shm_ip2_handle)
     self._basic_inference(shm_handles[0], shm_handles[1], shm_handles[2], shm_handles[3], error_msg)
     if len(error_msg) > 0:
         self.assertTrue(error_msg[-1] == "The input 'INPUT1' has shared memory of size 128 bytes"\
                                 " while the expected size is 1 * 64 = 64 bytes")
     shm_handles.append(shm_ip2_handle)
     self._cleanup_server(shm_handles)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: shared_memory_test.py Proyecto: intellisyscorp/fitzme-triton-inference-server

 def test_register_after_inference(self):
     # Register after inference
     error_msg = []
     shm_handles = self._configure_sever()
     shared_memory_ctx = SharedMemoryControlContext(_url,  _protocol, verbose=True)
     self._basic_inference(shm_handles[0], shm_handles[1], shm_handles[2], shm_handles[3], error_msg)
     if len(error_msg) > 0:
         raise Exception(str(error_msg))
     shm_ip2_handle = shm.create_shared_memory_region("input2_data", "/input2_data", 64)
     shared_memory_ctx.register(shm_ip2_handle)
     shm_status = shared_memory_ctx.get_shared_memory_status()
     self.assertTrue(len(shm_status.shared_memory_region) == 5)
     shm_handles.append(shm_ip2_handle)
     self._cleanup_server(shm_handles)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: shared_memory_test.py Proyecto: ai-learn-use/tensorrt-inference-server

# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import tensorrtserver.shared_memory as shm
from tensorrtserver.api import *
import numpy as np
import threading

# Raises error since invalid shm region
try:
    shm_op0_handle = shm.create_shared_memory_region("dummy_data",
                                                     "/dummy_data", -1)
except Exception as ex:
    assert str(ex) == "unable to initialize the size"

shared_memory_ctx = SharedMemoryControlContext("localhost:8000",
                                               ProtocolType.HTTP,
                                               verbose=False)

# Create a valid shared memory region
shm_op0_handle = shm.create_shared_memory_region("dummy_data", "/dummy_data",
                                                 8)
# Fill data in shared memory region
shm.set_shared_memory_region(shm_op0_handle, [np.array([1, 2])])
# Unregister before register does not fail - does nothing
shared_memory_ctx.unregister(shm_op0_handle)
# Test if register is working

Ejemplo n.º 11

0

Mostrar archivo

Archivo: simple_shm_string_client.py Proyecto: wzq918/tensorrt-inference-server

    in1n = np.array([str(x) for x in in1.reshape(in1.size)], dtype=object)
    input1_data = in1n.reshape(in1.shape)

    # serialize the string tensors
    input0_data_serialized = serialize_string_tensor(input0_data)
    input1_data_serialized = serialize_string_tensor(input1_data)

    # Use the size of the serialized tensors to create the shared memory regions
    input0_byte_size = input0_data_serialized.size * input0_data_serialized.itemsize
    input1_byte_size = input1_data_serialized.size * input1_data_serialized.itemsize
    output_byte_size = max(input0_byte_size, input1_byte_size) + 1
    output_byte_size = max(input0_byte_size, input1_byte_size) + 1

    # Create Output0 and Output1 in Shared Memory and store shared memory handles
    shm_op0_handle = shm.create_shared_memory_region("output0_data",
                                                     "/output0_simple",
                                                     output_byte_size)
    shm_op1_handle = shm.create_shared_memory_region("output1_data",
                                                     "/output1_simple",
                                                     output_byte_size)

    # Register Output0 and Output1 shared memory with TRTIS
    shared_memory_ctx.register(shm_op0_handle)
    shared_memory_ctx.register(shm_op1_handle)

    # Create Input0 and Input1 in Shared Memory and store shared memory handles
    shm_ip0_handle = shm.create_shared_memory_region("input0_data",
                                                     "/input0_simple",
                                                     input0_byte_size)
    shm_ip1_handle = shm.create_shared_memory_region("input1_data",
                                                     "/input1_simple",

Ejemplo n.º 12

0

Mostrar archivo

def infer_exact(tester,
                pf,
                tensor_shape,
                batch_size,
                input_dtype,
                output0_dtype,
                output1_dtype,
                output0_raw=True,
                output1_raw=True,
                model_version=None,
                swap=False,
                outputs=("OUTPUT0", "OUTPUT1"),
                use_http=True,
                use_grpc=True,
                skip_request_id_check=False,
                use_streaming=True,
                correlation_id=0,
                shm_region_names=None):
    tester.assertTrue(use_http or use_grpc or use_streaming)
    configs = []
    if use_http:
        if TEST_SHARED_MEMORY:
            configs.append(("localhost:8000", ProtocolType.HTTP, False, True))
        else:
            configs.append(("localhost:8000", ProtocolType.HTTP, False, False))
    if use_grpc:
        if TEST_SHARED_MEMORY:
            configs.append(("localhost:8001", ProtocolType.GRPC, False, True))
        else:
            configs.append(("localhost:8001", ProtocolType.GRPC, False, False))
    if use_streaming:
        if TEST_SHARED_MEMORY:
            configs.append(("localhost:8001", ProtocolType.GRPC, True, True))
        else:
            configs.append(("localhost:8001", ProtocolType.GRPC, True, False))

    for config in configs:
        model_name = tu.get_model_name(pf, input_dtype, output0_dtype,
                                       output1_dtype)

        # outputs are sum and difference of inputs so set max input
        # values so that they will not overflow the output. This
        # allows us to do an exact match. For float types use 8, 16,
        # 32 int range for fp 16, 32, 64 respectively. When getting
        # class outputs the result value/probability is returned as a
        # float so must use fp32 range in that case.
        rinput_dtype = _range_repr_dtype(input_dtype)
        routput0_dtype = _range_repr_dtype(
            output0_dtype if output0_raw else np.float32)
        routput1_dtype = _range_repr_dtype(
            output1_dtype if output1_raw else np.float32)
        val_min = max(
            np.iinfo(rinput_dtype).min,
            np.iinfo(routput0_dtype).min,
            np.iinfo(routput1_dtype).min) / 2
        val_max = min(
            np.iinfo(rinput_dtype).max,
            np.iinfo(routput0_dtype).max,
            np.iinfo(routput1_dtype).max) / 2

        num_classes = 3

        input0_list = list()
        input1_list = list()
        expected0_list = list()
        expected1_list = list()
        expected0_val_list = list()
        expected1_val_list = list()
        for b in range(batch_size):
            in0 = np.random.randint(low=val_min,
                                    high=val_max,
                                    size=tensor_shape,
                                    dtype=rinput_dtype)
            in1 = np.random.randint(low=val_min,
                                    high=val_max,
                                    size=tensor_shape,
                                    dtype=rinput_dtype)
            if input_dtype != np.object:
                in0 = in0.astype(input_dtype)
                in1 = in1.astype(input_dtype)

            if not swap:
                op0 = in0 + in1
                op1 = in0 - in1
            else:
                op0 = in0 - in1
                op1 = in0 + in1

            expected0_val_list.append(op0)
            expected1_val_list.append(op1)
            if output0_dtype == np.object:
                expected0_list.append(
                    np.array([
                        unicode(str(x), encoding='utf-8')
                        for x in (op0.flatten())
                    ],
                             dtype=object).reshape(op0.shape))
            else:
                expected0_list.append(op0.astype(output0_dtype))
            if output1_dtype == np.object:
                expected1_list.append(
                    np.array([
                        unicode(str(x), encoding='utf-8')
                        for x in (op1.flatten())
                    ],
                             dtype=object).reshape(op1.shape))
            else:
                expected1_list.append(op1.astype(output1_dtype))

            if input_dtype == np.object:
                in0n = np.array([str(x) for x in in0.reshape(in0.size)],
                                dtype=object)
                in0 = in0n.reshape(in0.shape)
                in1n = np.array([str(x) for x in in1.reshape(in1.size)],
                                dtype=object)
                in1 = in1n.reshape(in1.shape)

            input0_list.append(in0)
            input1_list.append(in1)

        if config[3]:
            input0_byte_size = input0_list[0].size * input0_list[
                0].itemsize * batch_size
            output0_byte_size = expected0_list[0].size * expected0_list[
                0].itemsize * batch_size
            output1_byte_size = expected1_list[0].size * expected1_list[
                0].itemsize * batch_size

            # create and register shared memory region for inputs and outputs
            if shm_region_names is None:
                shm_ip0_handle = shm.create_shared_memory_region(
                    "input0_data", "/input0", input0_byte_size)
                shm_ip1_handle = shm.create_shared_memory_region(
                    "input1_data", "/input1", input0_byte_size)
                if "OUTPUT0" in outputs:
                    shm_op0_handle = shm.create_shared_memory_region(
                        "output0_data", "/output0", output0_byte_size)
                if "OUTPUT1" in outputs:
                    shm_op1_handle = shm.create_shared_memory_region(
                        "output1_data", "/output1", output1_byte_size)
            else:
                shm_ip0_handle = shm.create_shared_memory_region(
                    shm_region_names[0] + '_data', '/' + shm_region_names[0],
                    input0_byte_size)
                shm_ip1_handle = shm.create_shared_memory_region(
                    shm_region_names[1] + '_data', '/' + shm_region_names[1],
                    input0_byte_size)
                i = 0
                if "OUTPUT0" in outputs:
                    shm_op0_handle = shm.create_shared_memory_region(
                        shm_region_names[2] + '_data',
                        '/' + shm_region_names[2], output0_byte_size)
                    i += 1
                if "OUTPUT1" in outputs:
                    shm_op1_handle = shm.create_shared_memory_region(
                        shm_region_names[2 + i] + '_data',
                        '/' + shm_region_names[2 + i], output1_byte_size)

            # copy data into shared memory region for input values
            shm.set_shared_memory_region(shm_ip0_handle, input0_list)
            shm.set_shared_memory_region(shm_ip1_handle, input1_list)

            shared_memory_ctx = SharedMemoryControlContext(config[0],
                                                           config[1],
                                                           verbose=True)
            shared_memory_ctx.register(shm_ip0_handle)
            shared_memory_ctx.register(shm_ip1_handle)
            if "OUTPUT0" in outputs:
                shared_memory_ctx.register(shm_op0_handle)
            if "OUTPUT1" in outputs:
                shared_memory_ctx.register(shm_op1_handle)

        expected0_sort_idx = [
            np.flip(np.argsort(x.flatten()), 0) for x in expected0_val_list
        ]
        expected1_sort_idx = [
            np.flip(np.argsort(x.flatten()), 0) for x in expected1_val_list
        ]

        output_req = {}
        OUTPUT0 = "OUTPUT0"
        OUTPUT1 = "OUTPUT1"
        INPUT0 = "INPUT0"
        INPUT1 = "INPUT1"
        if pf == "libtorch" or pf == "libtorch_nobatch":
            OUTPUT0 = "OUTPUT__0"
            OUTPUT1 = "OUTPUT__1"
            INPUT0 = "INPUT__0"
            INPUT1 = "INPUT__1"
        if "OUTPUT0" in outputs:
            if config[3]:
                output_req[OUTPUT0] = (InferContext.ResultFormat.RAW,
                                       shm_op0_handle)
            else:
                if output0_raw:
                    output_req[OUTPUT0] = InferContext.ResultFormat.RAW
                else:
                    output_req[OUTPUT0] = (InferContext.ResultFormat.CLASS,
                                           num_classes)
        if "OUTPUT1" in outputs:
            if config[3]:
                output_req[OUTPUT1] = (InferContext.ResultFormat.RAW,
                                       shm_op1_handle)
            else:
                if output1_raw:
                    output_req[OUTPUT1] = InferContext.ResultFormat.RAW
                else:
                    output_req[OUTPUT1] = (InferContext.ResultFormat.CLASS,
                                           num_classes)

        ctx = InferContext(config[0],
                           config[1],
                           model_name,
                           model_version,
                           correlation_id=correlation_id,
                           streaming=config[2],
                           verbose=True)
        if config[3]:
            results = ctx.run({
                INPUT0: shm_ip0_handle,
                INPUT1: shm_ip1_handle
            }, output_req, batch_size)
        else:
            results = ctx.run({
                INPUT0: input0_list,
                INPUT1: input1_list
            }, output_req, batch_size)

        if not skip_request_id_check:
            global _seen_request_ids
            request_id = ctx.get_last_request_id()
            tester.assertFalse(request_id in _seen_request_ids)
            _seen_request_ids.add(request_id)

        tester.assertEqual(ctx.get_last_request_model_name(), model_name)
        if model_version is not None:
            tester.assertEqual(ctx.get_last_request_model_version(),
                               model_version)

        tester.assertEqual(len(results), len(outputs))
        for (result_name, result_val) in iteritems(results):
            for b in range(batch_size):
                if ((result_name == OUTPUT0 and output0_raw)
                        or (result_name == OUTPUT1 and output1_raw)):
                    if result_name == OUTPUT0:
                        tester.assertTrue(
                            np.array_equal(result_val[b], expected0_list[b]),
                            "{}, {} expected: {}, got {}".format(
                                model_name, OUTPUT0, expected0_list[b],
                                result_val[b]))
                    elif result_name == OUTPUT1:
                        tester.assertTrue(
                            np.array_equal(result_val[b], expected1_list[b]),
                            "{}, {} expected: {}, got {}".format(
                                model_name, OUTPUT1, expected1_list[b],
                                result_val[b]))
                    else:
                        tester.assertTrue(
                            False,
                            "unexpected raw result {}".format(result_name))
                else:
                    # num_classes values must be returned and must
                    # match expected top values
                    class_list = result_val[b]
                    tester.assertEqual(len(class_list), num_classes)

                    expected0_flatten = expected0_list[b].flatten()
                    expected1_flatten = expected1_list[b].flatten()

                    for idx, ctuple in enumerate(class_list):
                        if result_name == OUTPUT0:
                            # can't compare indices since could have
                            # different indices with the same
                            # value/prob, so compare that the value of
                            # each index equals the expected
                            # value. Can only compare labels when the
                            # indices are equal.
                            tester.assertEqual(ctuple[1],
                                               expected0_flatten[ctuple[0]])
                            tester.assertEqual(
                                ctuple[1],
                                expected0_flatten[expected0_sort_idx[b][idx]])
                            if ctuple[0] == expected0_sort_idx[b][idx]:
                                tester.assertEqual(
                                    ctuple[2], 'label{}'.format(
                                        expected0_sort_idx[b][idx]))
                        elif result_name == OUTPUT1:
                            tester.assertEqual(ctuple[1],
                                               expected1_flatten[ctuple[0]])
                            tester.assertEqual(
                                ctuple[1],
                                expected1_flatten[expected1_sort_idx[b][idx]])
                        else:
                            tester.assertTrue(
                                False, "unexpected class result {}".format(
                                    result_name))

        if config[3]:
            shared_memory_ctx.unregister(shm_ip0_handle)
            shm.destroy_shared_memory_region(shm_ip0_handle)
            shared_memory_ctx.unregister(shm_ip1_handle)
            shm.destroy_shared_memory_region(shm_ip1_handle)
            if "OUTPUT0" in outputs:
                shared_memory_ctx.unregister(shm_op0_handle)
                shm.destroy_shared_memory_region(shm_op0_handle)
            if "OUTPUT1" in outputs:
                shared_memory_ctx.unregister(shm_op1_handle)
                shm.destroy_shared_memory_region(shm_op1_handle)

    return results

Ejemplo n.º 13

0

Mostrar archivo

def infer_zero(tester,
               pf,
               batch_size,
               tensor_dtype,
               input_shapes,
               output_shapes,
               model_version=None,
               use_http=True,
               use_grpc=True,
               use_streaming=True):
    tester.assertTrue(use_http or use_grpc or use_streaming)
    configs = []
    if use_http:
        if TEST_SHARED_MEMORY:
            configs.append(("localhost:8000", ProtocolType.HTTP, False, True))
        else:
            configs.append(("localhost:8000", ProtocolType.HTTP, False, False))
    if use_grpc:
        if TEST_SHARED_MEMORY:
            configs.append(("localhost:8001", ProtocolType.GRPC, False, True))
        else:
            configs.append(("localhost:8001", ProtocolType.GRPC, False, False))
    if use_streaming:
        if TEST_SHARED_MEMORY:
            configs.append(("localhost:8001", ProtocolType.GRPC, True, True))
        else:
            configs.append(("localhost:8001", ProtocolType.GRPC, True, False))
    tester.assertEqual(len(input_shapes), len(output_shapes))
    io_cnt = len(input_shapes)

    for config in configs:
        model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)
        input_dict = {}
        output_dict = {}
        expected_dict = {}

        if config[3]:
            # create and register shared memory region for inputs and outputs
            shm_ip_handles = list()
            shm_op_handles = list()
            shared_memory_ctx = SharedMemoryControlContext(config[0],
                                                           config[1],
                                                           verbose=True)
            for io_num in range(io_cnt):
                input0_byte_size = tu.shape_element_count(input_shapes[io_num]) *\
                                    np.dtype(tensor_dtype).itemsize * batch_size
                output0_byte_size = tu.shape_element_count(output_shapes[io_num]) *\
                                    np.dtype(tensor_dtype).itemsize * batch_size
                shm_ip_handles.append(shm.create_shared_memory_region("input"+str(io_num)+"_data",\
                                            "/input"+str(io_num), input0_byte_size))
                shm_op_handles.append(shm.create_shared_memory_region("output"+str(io_num)+"_data",\
                                            "/output"+str(io_num), output0_byte_size))

                shm.register(shm_ip_handles[io_num])
                shm.register(shm_op_handles[io_num])

            offset_input = 0
            offset_output = 0

        for io_num in range(io_cnt):
            if pf == "libtorch" or pf == "libtorch_nobatch":
                input_name = "INPUT__{}".format(io_num)
                output_name = "OUTPUT__{}".format(io_num)
            else:
                input_name = "INPUT{}".format(io_num)
                output_name = "OUTPUT{}".format(io_num)

            input_list = list()
            expected_list = list()
            for b in range(batch_size):
                rtensor_dtype = _range_repr_dtype(tensor_dtype)
                in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min,
                                        high=np.iinfo(rtensor_dtype).max,
                                        size=input_shapes[io_num],
                                        dtype=rtensor_dtype)
                if tensor_dtype != np.object:
                    in0 = in0.astype(tensor_dtype)
                    expected0 = np.ndarray.copy(in0)
                else:
                    expected0 = np.array([
                        unicode(str(x), encoding='utf-8')
                        for x in in0.flatten()
                    ],
                                         dtype=object)
                    in0 = np.array([str(x) for x in in0.flatten()],
                                   dtype=object).reshape(in0.shape)

                expected0 = expected0.reshape(output_shapes[io_num])

                input_list.append(in0)
                expected_list.append(expected0)

            expected_dict[output_name] = expected_list
            if config[3]:
                # copy data into shared memory region for input values
                shm.set_shared_memory_region(shm_ip_handles[io_num],
                                             input_list)
                input_dict[input_name] = shm_ip_handles[io_num]
                output_dict[output_name] = (InferContext.ResultFormat.RAW,
                                            shm_op_handles[io_num])
            else:
                input_dict[input_name] = input_list
                output_dict[output_name] = InferContext.ResultFormat.RAW

        ctx = InferContext(config[0],
                           config[1],
                           model_name,
                           model_version,
                           correlation_id=0,
                           streaming=config[2],
                           verbose=True)
        results = ctx.run(input_dict, output_dict, batch_size)

        tester.assertEqual(ctx.get_last_request_model_name(), model_name)
        if model_version is not None:
            tester.assertEqual(ctx.get_last_request_model_version(),
                               model_version)

        tester.assertEqual(len(results), io_cnt)
        for (result_name, result_val) in iteritems(results):
            tester.assertTrue(result_name in output_dict)
            tester.assertTrue(result_name in expected_dict)
            for b in range(batch_size):
                expected = expected_dict[result_name][b]
                tester.assertEqual(result_val[b].shape, expected.shape)
                tester.assertTrue(
                    np.array_equal(result_val[b], expected),
                    "{}, {}, slot {}, expected: {}, got {}".format(
                        model_name, result_name, b, expected, result_val[b]))
        if config[3]:
            for io_num in range(io_cnt):
                shared_memory_ctx.unregister(shm_ip_handles[io_num])
                shm.destroy_shared_memory_region(shm_ip_handles[io_num])
                shared_memory_ctx.unregister(shm_op_handles[io_num])
                shm.destroy_shared_memory_region(shm_op_handles[io_num])

    return results

Ejemplo n.º 14

0

Mostrar archivo

def infer_shape_tensor(tester,
                       pf,
                       batch_size,
                       tensor_dtype,
                       input_shape_values,
                       dummy_input_shapes,
                       model_version=None,
                       use_http=True,
                       use_grpc=True,
                       use_streaming=True,
                       shm_suffix="",
                       use_system_shared_memory=False,
                       use_cuda_shared_memory=False,
                       priority=0,
                       timeout_us=0):
    tester.assertTrue(use_http or use_grpc or use_streaming)
    configs = []
    if use_http:
        configs.append(("localhost:8000", ProtocolType.HTTP, False))
    if use_grpc:
        configs.append(("localhost:8001", ProtocolType.GRPC, False))
    if use_streaming:
        configs.append(("localhost:8001", ProtocolType.GRPC, True))
    tester.assertEqual(len(input_shape_values), len(dummy_input_shapes))
    io_cnt = len(input_shape_values)

    if use_system_shared_memory and use_cuda_shared_memory:
        raise ValueError(
            "Cannot set both System and CUDA shared memory flags to 1")

    input_dict = {}
    output_dict = {}
    expected_dict = {}
    shm_ip_handles = list()
    shm_op_handles = list()
    shared_memory_ctx = SharedMemoryControlContext("localhost:8000",
                                                   ProtocolType.HTTP,
                                                   verbose=False)

    for io_num in range(io_cnt):
        tester.assertTrue(pf == "plan" or pf == "plan_nobatch")

        input_name = "INPUT{}".format(io_num)
        output_name = "OUTPUT{}".format(io_num)
        dummy_input_name = "DUMMY_INPUT{}".format(io_num)
        dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)

        input_list = list()
        dummy_input_list = list()
        expected_list = list()
        for b in range(batch_size):
            # Prepare the dummy tensor
            rtensor_dtype = _range_repr_dtype(tensor_dtype)
            if (rtensor_dtype != np.bool):
                dummy_in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min,
                                              high=np.iinfo(rtensor_dtype).max,
                                              size=dummy_input_shapes[io_num],
                                              dtype=rtensor_dtype)
            else:
                dummy_in0 = np.random.choice(a=[False, True],
                                             size=dummy_input_shapes[io_num])
            if tensor_dtype != np.object:
                dummy_in0 = dummy_in0.astype(tensor_dtype)
            else:
                dummy_in0 = np.array([str(x) for x in in0.flatten()],
                                     dtype=object).reshape(in0.shape)

            dummy_input_list.append(dummy_in0)

        # Prepare shape input tensor. Only one tensor per batch
        in0 = np.asarray(input_shape_values[io_num], dtype=np.int32)
        input_list.append(in0)

        # Prepare the expected list for the output
        expected0 = np.ndarray.copy(in0)
        expected_list.append(expected0)

        expected_dict[output_name] = expected_list

        input_byte_size = len(in0) * np.dtype(tensor_dtype).itemsize
        output_byte_size = input_byte_size * batch_size
        dummy_input_byte_size = tu.shape_element_count(dummy_input_shapes[io_num]) *\
                            np.dtype(tensor_dtype).itemsize * batch_size
        # The dimension of this tensor will be the value of the shape tensor
        dummy_output_byte_size = tu.shape_element_count(in0) *\
                            np.dtype(tensor_dtype).itemsize * batch_size

        # create and register shared memory region for inputs and outputs
        if use_cuda_shared_memory:
            shm_ip_handles.append(
                cudashm.create_shared_memory_region(
                    "input" + str(io_num) + "_data" + shm_suffix,
                    input_byte_size, 0))
            shm_ip_handles.append(
                cudashm.create_shared_memory_region(
                    "dummy_input" + str(io_num) + "_data" + shm_suffix,
                    dummy_input_byte_size, 0))
            shm_op_handles.append(
                cudashm.create_shared_memory_region(
                    "output" + str(io_num) + "_data" + shm_suffix,
                    output_byte_size, 0))
            shm_op_handles.append(
                cudashm.create_shared_memory_region(
                    "dummy_output" + str(io_num) + "_data" + shm_suffix,
                    dummy_output_byte_size, 0))

            shared_memory_ctx.cuda_register(shm_ip_handles[2 * io_num])
            shared_memory_ctx.cuda_register(shm_ip_handles[2 * io_num + 1])
            shared_memory_ctx.cuda_register(shm_op_handles[2 * io_num])
            shared_memory_ctx.cuda_register(shm_op_handles[2 * io_num + 1])

            # copy data into shared memory region for input values
            cudashm.set_shared_memory_region(shm_ip_handles[2 * io_num],
                                             input_list)
            cudashm.set_shared_memory_region(shm_ip_handles[2 * io_num + 1],
                                             dummy_input_list)
        elif use_system_shared_memory:
            shm_ip_handles.append(shm.create_shared_memory_region("input"+str(io_num)+"_data"+shm_suffix,\
                                        "/input"+str(io_num)+shm_suffix, input_byte_size))
            shm_ip_handles.append(shm.create_shared_memory_region("dumy_input"+str(io_num)+"_data"+shm_suffix,\
                                        "/dummy_input"+str(io_num)+shm_suffix, dummy_input_byte_size))
            shm_op_handles.append(shm.create_shared_memory_region("output"+str(io_num)+"_data"+shm_suffix,\
                                        "/output"+str(io_num)+shm_suffix, output_byte_size))
            shm_op_handles.append(shm.create_shared_memory_region("dummy_output"+str(io_num)+"_data"+shm_suffix,\
                                        "/dummy_output"+str(io_num)+shm_suffix, dummy_output_byte_size))
            shared_memory_ctx.register(shm_ip_handles[2 * io_num])
            shared_memory_ctx.register(shm_ip_handles[2 * io_num + 1])
            shared_memory_ctx.register(shm_op_handles[2 * io_num])
            shared_memory_ctx.register(shm_op_handles[2 * io_num + 1])
            # copy data into shared memory region for input values
            shm.set_shared_memory_region(shm_ip_handles[2 * io_num],
                                         input_list)
            shm.set_shared_memory_region(shm_ip_handles[2 * io_num + 1],
                                         dummy_input_list)
        if use_system_shared_memory or use_cuda_shared_memory:
            input_dict[input_name] = (shm_ip_handles[2 * io_num],
                                      [len(input_shape_values[0])])
            input_dict[dummy_input_name] = (shm_ip_handles[2 * io_num + 1],
                                            dummy_input_shapes[io_num])
            output_dict[output_name] = (InferContext.ResultFormat.RAW,
                                        shm_op_handles[2 * io_num])
            output_dict[dummy_output_name] = (InferContext.ResultFormat.RAW,
                                              shm_op_handles[2 * io_num + 1])
        else:
            input_dict[input_name] = input_list
            input_dict[dummy_input_name] = dummy_input_list
            output_dict[output_name] = InferContext.ResultFormat.RAW
            output_dict[dummy_output_name] = InferContext.ResultFormat.RAW

    # Run inference and check results for each config
    for config in configs:
        model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)

        ctx = InferContext(config[0],
                           config[1],
                           model_name,
                           model_version,
                           correlation_id=0,
                           streaming=config[2],
                           verbose=True)
        results = ctx.run(input_dict,
                          output_dict,
                          batch_size,
                          priority=priority,
                          timeout_us=timeout_us)

        tester.assertEqual(ctx.get_last_request_model_name(), model_name)
        if model_version is not None:
            tester.assertEqual(ctx.get_last_request_model_version(),
                               model_version)

        tester.assertEqual(len(results), 2 * io_cnt)
        for (result_name, result_val) in iteritems(results):
            tester.assertTrue(result_name in output_dict)
            expected = expected_dict[output_name][0]
            for b in range(batch_size):
                if result_name == output_name:
                    tester.assertEqual(result_val[b].shape, expected.shape)
                    tester.assertTrue(
                        np.array_equal(result_val[b], expected),
                        "{}, {}, slot {}, expected: {}, got {}".format(
                            model_name, result_name, b, expected,
                            result_val[b]))
                elif result_name == dummy_output_name:
                    # The shape of the dummy output should be equal to the shape values
                    # specified in the shape tensor
                    tester.assertTrue(
                        np.array_equal(result_val[b].shape, expected),
                        "{}, {}, slot {}, expected: {}, got {}".format(
                            model_name, result_name, b, expected,
                            result_val[b]))

    if use_cuda_shared_memory or use_system_shared_memory:
        for io_num in range(2 * io_cnt):
            shared_memory_ctx.unregister(shm_ip_handles[io_num])
            shared_memory_ctx.unregister(shm_op_handles[io_num])
            if use_cuda_shared_memory:
                cudashm.destroy_shared_memory_region(shm_ip_handles[io_num])
                cudashm.destroy_shared_memory_region(shm_op_handles[io_num])
            else:
                shm.destroy_shared_memory_region(shm_ip_handles[io_num])
                shm.destroy_shared_memory_region(shm_op_handles[io_num])

    return results

Ejemplo n.º 15

0

Mostrar archivo

Archivo: sequence_util.py Proyecto: intellisyscorp/fitzme-triton-inference-server

    def check_sequence(self,
                       trial,
                       model_name,
                       input_dtype,
                       correlation_id,
                       sequence_thresholds,
                       values,
                       expected_result,
                       protocol,
                       batch_size=1,
                       sequence_name="<unknown>",
                       tensor_shape=(1, )):
        """Perform sequence of inferences. The 'values' holds a list of
        tuples, one for each inference with format:

        (flag_str, value, (ls_ms, gt_ms), (pre_delay_ms, post_delay_ms)

        """
        if (("savedmodel" not in trial) and ("graphdef" not in trial)
                and ("netdef" not in trial) and ("custom" not in trial)
                and ("onnx" not in trial) and ("libtorch" not in trial)
                and ("plan" not in trial)):
            self.assertFalse(True, "unknown trial type: " + trial)

        # Can only send the request exactly once since it is a
        # sequence model with state, so can have only a single config.
        configs = []
        if protocol == "http":
            configs.append(("localhost:8000", ProtocolType.HTTP, False))
        if protocol == "grpc":
            configs.append(("localhost:8001", ProtocolType.GRPC, False))
        if protocol == "streaming":
            configs.append(("localhost:8001", ProtocolType.GRPC, True))

        self.assertFalse(
            _test_system_shared_memory and _test_cuda_shared_memory,
            "Cannot set both System and CUDA shared memory flags to 1")

        self.assertEqual(len(configs), 1)

        # create and register shared memory output region in advance
        if _test_system_shared_memory or _test_cuda_shared_memory:
            shared_memory_ctx = SharedMemoryControlContext("localhost:8000",
                                                           ProtocolType.HTTP,
                                                           verbose=True)
            output_byte_size = 512
            if _test_system_shared_memory:
                shm_op_handle = shm.create_shared_memory_region(
                    "output_data", "/output", output_byte_size)
                shared_memory_ctx.unregister(shm_op_handle)
                shared_memory_ctx.register(shm_op_handle)
            elif _test_cuda_shared_memory:
                shm_op_handle = cudashm.create_shared_memory_region(
                    "output_data", output_byte_size, 0)
                shared_memory_ctx.unregister(shm_op_handle)
                shared_memory_ctx.cuda_register(shm_op_handle)

        for config in configs:
            ctx = InferContext(config[0],
                               config[1],
                               model_name,
                               correlation_id=correlation_id,
                               streaming=config[2],
                               verbose=True)
            # Execute the sequence of inference...
            try:
                seq_start_ms = int(round(time.time() * 1000))

                for flag_str, value, thresholds, delay_ms in values:
                    if delay_ms is not None:
                        time.sleep(delay_ms[0] / 1000.0)

                    flags = InferRequestHeader.FLAG_NONE
                    if flag_str is not None:
                        if "start" in flag_str:
                            flags = flags | InferRequestHeader.FLAG_SEQUENCE_START
                        if "end" in flag_str:
                            flags = flags | InferRequestHeader.FLAG_SEQUENCE_END

                    input_list = list()
                    for b in range(batch_size):
                        if input_dtype == np.object:
                            in0 = np.full(tensor_shape, value, dtype=np.int32)
                            in0n = np.array(
                                [str(x) for x in in0.reshape(in0.size)],
                                dtype=object)
                            in0 = in0n.reshape(tensor_shape)
                        else:
                            in0 = np.full(tensor_shape,
                                          value,
                                          dtype=input_dtype)
                        input_list.append(in0)

                    # create input shared memory and copy input data values into it
                    if _test_system_shared_memory or _test_cuda_shared_memory:
                        input_list_tmp = iu._prepend_string_size(
                            input_list) if (input_dtype
                                            == np.object) else input_list
                        input_byte_size = sum(
                            [i0.nbytes for i0 in input_list_tmp])
                        if _test_system_shared_memory:
                            shm_ip_handle = shm.create_shared_memory_region(
                                "input_data", "/input", input_byte_size)
                            shm.set_shared_memory_region(
                                shm_ip_handle, input_list_tmp)
                            shared_memory_ctx.unregister(shm_ip_handle)
                            shared_memory_ctx.register(shm_ip_handle)
                        elif _test_cuda_shared_memory:
                            shm_ip_handle = cudashm.create_shared_memory_region(
                                "input_data", input_byte_size, 0)
                            cudashm.set_shared_memory_region(
                                shm_ip_handle, input_list_tmp)
                            shared_memory_ctx.unregister(shm_ip_handle)
                            shared_memory_ctx.cuda_register(shm_ip_handle)

                        input_info = (shm_ip_handle, tensor_shape)
                        output_info = (InferContext.ResultFormat.RAW,
                                       shm_op_handle)
                    else:
                        input_info = input_list
                        output_info = InferContext.ResultFormat.RAW

                    start_ms = int(round(time.time() * 1000))
                    INPUT = "INPUT__0" if trial.startswith(
                        "libtorch") else "INPUT"
                    OUTPUT = "OUTPUT__0" if trial.startswith(
                        "libtorch") else "OUTPUT"

                    results = ctx.run({INPUT: input_info},
                                      {OUTPUT: output_info},
                                      batch_size=batch_size,
                                      flags=flags)

                    end_ms = int(round(time.time() * 1000))

                    self.assertEqual(len(results), 1)
                    self.assertTrue(OUTPUT in results)
                    result = results[OUTPUT][0][0]
                    print("{}: {}".format(sequence_name, result))

                    if thresholds is not None:
                        lt_ms = thresholds[0]
                        gt_ms = thresholds[1]
                        if lt_ms is not None:
                            self.assertTrue(
                                (end_ms - start_ms) < lt_ms,
                                "expected less than " + str(lt_ms) +
                                "ms response time, got " +
                                str(end_ms - start_ms) + " ms")
                        if gt_ms is not None:
                            self.assertTrue(
                                (end_ms - start_ms) > gt_ms,
                                "expected greater than " + str(gt_ms) +
                                "ms response time, got " +
                                str(end_ms - start_ms) + " ms")
                    if delay_ms is not None:
                        time.sleep(delay_ms[1] / 1000.0)

                seq_end_ms = int(round(time.time() * 1000))

                if input_dtype == np.object:
                    self.assertEqual(int(result), expected_result)
                else:
                    self.assertEqual(result, expected_result)

                if sequence_thresholds is not None:
                    lt_ms = sequence_thresholds[0]
                    gt_ms = sequence_thresholds[1]
                    if lt_ms is not None:
                        self.assertTrue((seq_end_ms - seq_start_ms) < lt_ms,
                                        "sequence expected less than " +
                                        str(lt_ms) + "ms response time, got " +
                                        str(seq_end_ms - seq_start_ms) + " ms")
                    if gt_ms is not None:
                        self.assertTrue((seq_end_ms - seq_start_ms) > gt_ms,
                                        "sequence expected greater than " +
                                        str(gt_ms) + "ms response time, got " +
                                        str(seq_end_ms - seq_start_ms) + " ms")
            except Exception as ex:
                self.add_deferred_exception(ex)

        if _test_system_shared_memory or _test_cuda_shared_memory:
            shared_memory_ctx.unregister(shm_op_handle)
            if _test_system_shared_memory:
                shm.destroy_shared_memory_region(shm_op_handle)
            elif _test_cuda_shared_memory:
                cudashm.destroy_shared_memory_region(shm_op_handle)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: sequence_util.py Proyecto: intellisyscorp/fitzme-triton-inference-server

    def precreate_register_dynaseq_shape_tensor_regions(
        self, value_list, dtype, i, batch_size=1, tensor_shape=(1, )):
        if _test_system_shared_memory or _test_cuda_shared_memory:
            shared_memory_ctx = SharedMemoryControlContext("localhost:8000",
                                                           ProtocolType.HTTP,
                                                           verbose=True)
            shm_region_handles = []
            for j, (shape_value, value) in enumerate(value_list):
                input_list = list()
                shape_input_list = list()
                dummy_input_list = list()

                for b in range(batch_size):
                    if dtype == np.object:
                        dummy_in0 = np.full(tensor_shape,
                                            value,
                                            dtype=np.int32)
                        dummy_in0n = np.array(
                            [str(x) for x in dummy_in0.reshape(in0.size)],
                            dtype=object)
                        dummy_in0 = dummy_in0n.reshape(tensor_shape)
                    else:
                        dummy_in0 = np.full(tensor_shape, value, dtype=dtype)
                    dummy_input_list.append(dummy_in0)
                    in0 = np.full(tensor_shape, value, dtype=np.int32)
                    input_list.append(in0)

                # Only one shape tensor input per batch
                shape_input_list.append(
                    np.full(tensor_shape, shape_value, dtype=np.int32))

                input_list_tmp = iu._prepend_string_size(input_list) if (
                    dtype == np.object) else input_list
                input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])
                shape_input_byte_size = sum(
                    [i0.nbytes for i0 in shape_input_list])
                dummy_input_byte_size = sum(
                    [i0.nbytes for i0 in dummy_input_list])
                shape_output_byte_size = shape_input_byte_size
                output_byte_size = np.dtype(np.int32).itemsize + 2
                resized_output_byte_size = 32 * shape_value

                # create shared memory regions and copy data for input values
                if _test_system_shared_memory:
                    shm_ip_handle = shm.create_shared_memory_region(
                        'ip{}{}_data'.format(i, j), '/ip{}{}'.format(i, j),
                        input_byte_size)
                    shm_shape_ip_handle = shm.create_shared_memory_region(
                        'shape_ip{}{}_data'.format(i, j),
                        '/shape_ip{}{}'.format(i, j), shape_input_byte_size)
                    shm_dummy_ip_handle = shm.create_shared_memory_region(
                        'dummy_ip{}{}_data'.format(i, j),
                        '/dummy_ip{}{}'.format(i, j), dummy_input_byte_size)
                    shm_shape_op_handle = shm.create_shared_memory_region(
                        'shape_op{}{}_data'.format(i, j),
                        '/shape_op{}{}'.format(i, j), shape_output_byte_size)
                    shm_op_handle = shm.create_shared_memory_region(
                        'op{}{}_data'.format(i, j), '/op{}{}'.format(i, j),
                        output_byte_size)
                    shm_resized_op_handle = shm.create_shared_memory_region(
                        'resized_op{}{}_data'.format(i, j),
                        '/resized_op{}{}'.format(i,
                                                 j), resized_output_byte_size)
                    shm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
                    shm.set_shared_memory_region(shm_shape_ip_handle,
                                                 shape_input_list)
                    shm.set_shared_memory_region(shm_dummy_ip_handle,
                                                 dummy_input_list)
                    shared_memory_ctx.register(shm_ip_handle)
                    shared_memory_ctx.register(shm_shape_ip_handle)
                    shared_memory_ctx.register(shm_dummy_ip_handle)
                    shared_memory_ctx.register(shm_shape_op_handle)
                    shared_memory_ctx.register(shm_op_handle)
                    shared_memory_ctx.register(shm_resized_op_handle)
                elif _test_cuda_shared_memory:
                    shm_ip_handle = cudashm.create_shared_memory_region(
                        'ip{}{}_data'.format(i, j), input_byte_size, 0)
                    shm_shape_ip_handle = cudashm.create_shared_memory_region(
                        'shape_ip{}{}_data'.format(i, j),
                        shape_input_byte_size, 0)
                    shm_dummy_ip_handle = cudashm.create_shared_memory_region(
                        'dummy_ip{}{}_data'.format(i, j),
                        dummy_input_byte_size, 0)
                    shm_shape_op_handle = cudashm.create_shared_memory_region(
                        'shape_op{}{}_data'.format(i, j),
                        shape_output_byte_size, 0)
                    shm_op_handle = cudashm.create_shared_memory_region(
                        'op{}{}_data'.format(i, j), output_byte_size, 0)
                    shm_resized_op_handle = cudashm.create_shared_memory_region(
                        'resized_op{}{}_data'.format(i, j),
                        resized_output_byte_size, 0)
                    cudashm.set_shared_memory_region(shm_ip_handle,
                                                     input_list_tmp)
                    cudashm.set_shared_memory_region(shm_shape_ip_handle,
                                                     shape_input_list)
                    cudashm.set_shared_memory_region(shm_dummy_ip_handle,
                                                     dummy_input_list)
                    shared_memory_ctx.cuda_register(shm_ip_handle)
                    shared_memory_ctx.cuda_register(shm_shape_ip_handle)
                    shared_memory_ctx.cuda_register(shm_dummy_ip_handle)
                    shared_memory_ctx.cuda_register(shm_shape_op_handle)
                    shared_memory_ctx.cuda_register(shm_op_handle)
                    shared_memory_ctx.cuda_register(shm_resized_op_handle)
                shm_region_handles.append(shm_ip_handle)
                shm_region_handles.append(shm_shape_ip_handle)
                shm_region_handles.append(shm_dummy_ip_handle)
                shm_region_handles.append(shm_shape_op_handle)
                shm_region_handles.append(shm_op_handle)
                shm_region_handles.append(shm_resized_op_handle)
            return shm_region_handles
        else:
            return []

Ejemplo n.º 17

0

Mostrar archivo

Archivo: shm_util.py Proyecto: zhangxuann/tensorrt-inference-server

def create_register_set_shm_regions(input0_list, input1_list, expected0_list, \
                                expected1_list, outputs, shm_region_names, precreated_shm_regions):
    if TEST_CUDA_SHARED_MEMORY and TEST_SYSTEM_SHARED_MEMORY:
        raise ValueError(
            "Cannot set both System and CUDA shared memory flags to 1")

    shared_memory_ctx = SharedMemoryControlContext("localhost:8000",
                                                   ProtocolType.HTTP,
                                                   verbose=False)

    input0_byte_size = sum([i0.nbytes for i0 in input0_list])
    input1_byte_size = sum([i1.nbytes for i1 in input1_list])
    output0_byte_size = sum([e0.nbytes for e0 in expected0_list])
    output1_byte_size = sum([e1.nbytes for e1 in expected1_list])
    shm_io_handles = []

    if shm_region_names is None:
        shm_region_names = ['input0', 'input1', 'output0', 'output1']

    if TEST_SYSTEM_SHARED_MEMORY:
        shm_ip0_handle = shm.create_shared_memory_region(
            shm_region_names[0] + '_data', '/' + shm_region_names[0],
            input0_byte_size)
        shm_ip1_handle = shm.create_shared_memory_region(
            shm_region_names[1] + '_data', '/' + shm_region_names[1],
            input1_byte_size)
        shm.set_shared_memory_region(shm_ip0_handle, input0_list)
        shm.set_shared_memory_region(shm_ip1_handle, input1_list)
        shared_memory_ctx.unregister(shm_ip0_handle)
        shared_memory_ctx.register(shm_ip0_handle)
        shared_memory_ctx.unregister(shm_ip1_handle)
        shared_memory_ctx.register(shm_ip1_handle)
        shm_io_handles.extend([shm_ip0_handle, shm_ip1_handle])

        i = 0
        if "OUTPUT0" in outputs:
            if precreated_shm_regions is None:
                shm_op0_handle = shm.create_shared_memory_region(
                    shm_region_names[2] + '_data', '/' + shm_region_names[2],
                    output0_byte_size)
                shared_memory_ctx.unregister(shm_op0_handle)
                shared_memory_ctx.register(shm_op0_handle)
            else:
                shm_op0_handle = precreated_shm_regions[0]
            shm_io_handles.append(shm_op0_handle)
            i += 1
        if "OUTPUT1" in outputs:
            if precreated_shm_regions is None:
                shm_op1_handle = shm.create_shared_memory_region(
                    shm_region_names[2 + i] + '_data',
                    '/' + shm_region_names[2 + i], output1_byte_size)
                shared_memory_ctx.unregister(shm_op1_handle)
                shared_memory_ctx.register(shm_op1_handle)
            else:
                shm_op1_handle = precreated_shm_regions[i]
            shm_io_handles.append(shm_op1_handle)

    if TEST_CUDA_SHARED_MEMORY:
        shm_ip0_handle = cudashm.create_shared_memory_region(
            shm_region_names[0] + '_data', input0_byte_size, 0)
        shm_ip1_handle = cudashm.create_shared_memory_region(
            shm_region_names[1] + '_data', input1_byte_size, 0)
        cudashm.set_shared_memory_region(shm_ip0_handle, input0_list)
        cudashm.set_shared_memory_region(shm_ip1_handle, input1_list)
        shared_memory_ctx.unregister(shm_ip0_handle)
        shared_memory_ctx.cuda_register(shm_ip0_handle)
        shared_memory_ctx.unregister(shm_ip1_handle)
        shared_memory_ctx.cuda_register(shm_ip1_handle)
        shm_io_handles.extend([shm_ip0_handle, shm_ip1_handle])

        i = 0
        if "OUTPUT0" in outputs:
            if precreated_shm_regions is None:
                shm_op0_handle = cudashm.create_shared_memory_region(
                    shm_region_names[2] + '_data', output0_byte_size, 0)
                shared_memory_ctx.unregister(shm_op0_handle)
                shared_memory_ctx.cuda_register(shm_op0_handle)
            else:
                shm_op0_handle = precreated_shm_regions[0]
            shm_io_handles.append(shm_op0_handle)
            i += 1
        if "OUTPUT1" in outputs:
            if precreated_shm_regions is None:
                shm_op1_handle = cudashm.create_shared_memory_region(
                    shm_region_names[2 + i] + '_data', output1_byte_size, 0)
                shared_memory_ctx.unregister(shm_op1_handle)
                shared_memory_ctx.cuda_register(shm_op1_handle)
            else:
                shm_op1_handle = precreated_shm_regions[i]
            shm_io_handles.append(shm_op1_handle)

    return shm_io_handles