Esempio n. 1
0
def infer_zero(tester,
               pf,
               batch_size,
               tensor_dtype,
               input_shapes,
               output_shapes,
               model_version=None,
               use_http=True,
               use_grpc=True,
               use_streaming=True):
    tester.assertTrue(use_http or use_grpc or use_streaming)
    configs = []
    if use_http:
        if TEST_SHARED_MEMORY:
            configs.append(("localhost:8000", ProtocolType.HTTP, False, True))
        else:
            configs.append(("localhost:8000", ProtocolType.HTTP, False, False))
    if use_grpc:
        if TEST_SHARED_MEMORY:
            configs.append(("localhost:8001", ProtocolType.GRPC, False, True))
        else:
            configs.append(("localhost:8001", ProtocolType.GRPC, False, False))
    if use_streaming:
        if TEST_SHARED_MEMORY:
            configs.append(("localhost:8001", ProtocolType.GRPC, True, True))
        else:
            configs.append(("localhost:8001", ProtocolType.GRPC, True, False))
    tester.assertEqual(len(input_shapes), len(output_shapes))
    io_cnt = len(input_shapes)

    for config in configs:
        model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)
        input_dict = {}
        output_dict = {}
        expected_dict = {}

        if config[3]:
            # create and register shared memory region for inputs and outputs
            shm_ip_handles = list()
            shm_op_handles = list()
            shared_memory_ctx = SharedMemoryControlContext(config[0],
                                                           config[1],
                                                           verbose=True)
            for io_num in range(io_cnt):
                input0_byte_size = tu.shape_element_count(input_shapes[io_num]) *\
                                    np.dtype(tensor_dtype).itemsize * batch_size
                output0_byte_size = tu.shape_element_count(output_shapes[io_num]) *\
                                    np.dtype(tensor_dtype).itemsize * batch_size
                shm_ip_handles.append(shm.create_shared_memory_region("input"+str(io_num)+"_data",\
                                            "/input"+str(io_num), input0_byte_size))
                shm_op_handles.append(shm.create_shared_memory_region("output"+str(io_num)+"_data",\
                                            "/output"+str(io_num), output0_byte_size))

                shm.register(shm_ip_handles[io_num])
                shm.register(shm_op_handles[io_num])

            offset_input = 0
            offset_output = 0

        for io_num in range(io_cnt):
            if pf == "libtorch" or pf == "libtorch_nobatch":
                input_name = "INPUT__{}".format(io_num)
                output_name = "OUTPUT__{}".format(io_num)
            else:
                input_name = "INPUT{}".format(io_num)
                output_name = "OUTPUT{}".format(io_num)

            input_list = list()
            expected_list = list()
            for b in range(batch_size):
                rtensor_dtype = _range_repr_dtype(tensor_dtype)
                in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min,
                                        high=np.iinfo(rtensor_dtype).max,
                                        size=input_shapes[io_num],
                                        dtype=rtensor_dtype)
                if tensor_dtype != np.object:
                    in0 = in0.astype(tensor_dtype)
                    expected0 = np.ndarray.copy(in0)
                else:
                    expected0 = np.array([
                        unicode(str(x), encoding='utf-8')
                        for x in in0.flatten()
                    ],
                                         dtype=object)
                    in0 = np.array([str(x) for x in in0.flatten()],
                                   dtype=object).reshape(in0.shape)

                expected0 = expected0.reshape(output_shapes[io_num])

                input_list.append(in0)
                expected_list.append(expected0)

            expected_dict[output_name] = expected_list
            if config[3]:
                # copy data into shared memory region for input values
                shm.set_shared_memory_region(shm_ip_handles[io_num],
                                             input_list)
                input_dict[input_name] = shm_ip_handles[io_num]
                output_dict[output_name] = (InferContext.ResultFormat.RAW,
                                            shm_op_handles[io_num])
            else:
                input_dict[input_name] = input_list
                output_dict[output_name] = InferContext.ResultFormat.RAW

        ctx = InferContext(config[0],
                           config[1],
                           model_name,
                           model_version,
                           correlation_id=0,
                           streaming=config[2],
                           verbose=True)
        results = ctx.run(input_dict, output_dict, batch_size)

        tester.assertEqual(ctx.get_last_request_model_name(), model_name)
        if model_version is not None:
            tester.assertEqual(ctx.get_last_request_model_version(),
                               model_version)

        tester.assertEqual(len(results), io_cnt)
        for (result_name, result_val) in iteritems(results):
            tester.assertTrue(result_name in output_dict)
            tester.assertTrue(result_name in expected_dict)
            for b in range(batch_size):
                expected = expected_dict[result_name][b]
                tester.assertEqual(result_val[b].shape, expected.shape)
                tester.assertTrue(
                    np.array_equal(result_val[b], expected),
                    "{}, {}, slot {}, expected: {}, got {}".format(
                        model_name, result_name, b, expected, result_val[b]))
        if config[3]:
            for io_num in range(io_cnt):
                shared_memory_ctx.unregister(shm_ip_handles[io_num])
                shm.destroy_shared_memory_region(shm_ip_handles[io_num])
                shared_memory_ctx.unregister(shm_op_handles[io_num])
                shm.destroy_shared_memory_region(shm_op_handles[io_num])

    return results
Esempio n. 2
0
def infer_zero(tester,
               pf,
               batch_size,
               tensor_dtype,
               input_shapes,
               output_shapes,
               model_version=None,
               use_http=True,
               use_grpc=True,
               use_streaming=True,
               shm_region_name_prefix=None,
               use_system_shared_memory=False,
               use_cuda_shared_memory=False):
    tester.assertTrue(use_http or use_grpc or use_streaming)
    configs = []
    if use_http:
        configs.append(("localhost:8000", ProtocolType.HTTP, False))
    if use_grpc:
        configs.append(("localhost:8001", ProtocolType.GRPC, False))
    if use_streaming:
        configs.append(("localhost:8001", ProtocolType.GRPC, True))
    tester.assertEqual(len(input_shapes), len(output_shapes))
    io_cnt = len(input_shapes)

    if shm_region_name_prefix is None:
        shm_region_name_prefix = ["input", "output"]

    input_dict = {}
    output_dict = {}
    expected_dict = {}
    shm_ip_handles = list()
    shm_op_handles = list()
    shared_memory_ctx = SharedMemoryControlContext("localhost:8000",
                                                   ProtocolType.HTTP,
                                                   verbose=False)

    for io_num in range(io_cnt):
        if pf == "libtorch" or pf == "libtorch_nobatch":
            input_name = "INPUT__{}".format(io_num)
            output_name = "OUTPUT__{}".format(io_num)
        else:
            input_name = "INPUT{}".format(io_num)
            output_name = "OUTPUT{}".format(io_num)

        input_list = list()
        expected_list = list()
        for b in range(batch_size):
            rtensor_dtype = _range_repr_dtype(tensor_dtype)
            if (rtensor_dtype != np.bool):
                in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min,
                                        high=np.iinfo(rtensor_dtype).max,
                                        size=input_shapes[io_num],
                                        dtype=rtensor_dtype)
            else:
                in0 = np.random.choice(a=[False, True],
                                       size=input_shapes[io_num])
            if tensor_dtype != np.object:
                in0 = in0.astype(tensor_dtype)
                expected0 = np.ndarray.copy(in0)
            else:
                expected0 = np.array(
                    [unicode(str(x), encoding='utf-8') for x in in0.flatten()],
                    dtype=object)
                in0 = np.array([str(x) for x in in0.flatten()],
                               dtype=object).reshape(in0.shape)

            expected0 = expected0.reshape(output_shapes[io_num])

            input_list.append(in0)
            expected_list.append(expected0)

        expected_dict[output_name] = expected_list

        input_byte_size = tu.shape_element_count(input_shapes[io_num]) *\
                            np.dtype(tensor_dtype).itemsize * batch_size
        output_byte_size = tu.shape_element_count(output_shapes[io_num]) *\
                            np.dtype(tensor_dtype).itemsize * batch_size
        # create and register shared memory region for inputs and outputs
        shm_io_handle = su.create_register_set_either_shm_region(
            [
                shm_region_name_prefix[0] + str(io_num),
                shm_region_name_prefix[1] + str(io_num)
            ], input_list, input_byte_size, output_byte_size,
            shared_memory_ctx, use_system_shared_memory,
            use_cuda_shared_memory)
        if len(shm_io_handle) != 0:
            shm_ip_handles.append(shm_io_handle[0])
            shm_op_handles.append(shm_io_handle[1])
            input_dict[input_name] = (shm_ip_handles[io_num], input_shapes)
            output_dict[output_name] = (InferContext.ResultFormat.RAW,
                                        shm_op_handles[io_num])
        else:
            input_dict[input_name] = input_list
            output_dict[output_name] = InferContext.ResultFormat.RAW

    # Run inference and check results for each config
    for config in configs:
        model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)

        ctx = InferContext(config[0],
                           config[1],
                           model_name,
                           model_version,
                           correlation_id=0,
                           streaming=config[2],
                           verbose=True)
        results = ctx.run(input_dict, output_dict, batch_size)

        tester.assertEqual(ctx.get_last_request_model_name(), model_name)
        if model_version is not None:
            tester.assertEqual(ctx.get_last_request_model_version(),
                               model_version)

        tester.assertEqual(len(results), io_cnt)
        for (result_name, result_val) in iteritems(results):
            tester.assertTrue(result_name in output_dict)
            tester.assertTrue(result_name in expected_dict)
            for b in range(batch_size):
                expected = expected_dict[result_name][b]
                tester.assertEqual(result_val[b].shape, expected.shape)
                tester.assertTrue(
                    np.array_equal(result_val[b], expected),
                    "{}, {}, slot {}, expected: {}, got {}".format(
                        model_name, result_name, b, expected, result_val[b]))

    if len(shm_ip_handles) != 0:
        for io_num in range(io_cnt):
            shared_memory_ctx.unregister(shm_ip_handles[io_num])
            shared_memory_ctx.unregister(shm_op_handles[io_num])
            su.destroy_either_shm_region(shm_ip_handles[io_num],
                                         use_system_shared_memory,
                                         use_cuda_shared_memory)
            su.destroy_either_shm_region(shm_op_handles[io_num],
                                         use_system_shared_memory,
                                         use_cuda_shared_memory)

    return results
Esempio n. 3
0
def infer_shape_tensor(tester,
                       pf,
                       batch_size,
                       tensor_dtype,
                       input_shape_values,
                       dummy_input_shapes,
                       model_version=None,
                       use_http=True,
                       use_grpc=True,
                       use_streaming=True,
                       shm_suffix="",
                       use_system_shared_memory=False,
                       use_cuda_shared_memory=False,
                       priority=0,
                       timeout_us=0):
    tester.assertTrue(use_http or use_grpc or use_streaming)
    configs = []
    if use_http:
        configs.append(("localhost:8000", ProtocolType.HTTP, False))
    if use_grpc:
        configs.append(("localhost:8001", ProtocolType.GRPC, False))
    if use_streaming:
        configs.append(("localhost:8001", ProtocolType.GRPC, True))
    tester.assertEqual(len(input_shape_values), len(dummy_input_shapes))
    io_cnt = len(input_shape_values)

    if use_system_shared_memory and use_cuda_shared_memory:
        raise ValueError(
            "Cannot set both System and CUDA shared memory flags to 1")

    input_dict = {}
    output_dict = {}
    expected_dict = {}
    shm_ip_handles = list()
    shm_op_handles = list()
    shared_memory_ctx = SharedMemoryControlContext("localhost:8000",
                                                   ProtocolType.HTTP,
                                                   verbose=False)

    for io_num in range(io_cnt):
        tester.assertTrue(pf == "plan" or pf == "plan_nobatch")

        input_name = "INPUT{}".format(io_num)
        output_name = "OUTPUT{}".format(io_num)
        dummy_input_name = "DUMMY_INPUT{}".format(io_num)
        dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)

        input_list = list()
        dummy_input_list = list()
        expected_list = list()
        for b in range(batch_size):
            # Prepare the dummy tensor
            rtensor_dtype = _range_repr_dtype(tensor_dtype)
            if (rtensor_dtype != np.bool):
                dummy_in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min,
                                              high=np.iinfo(rtensor_dtype).max,
                                              size=dummy_input_shapes[io_num],
                                              dtype=rtensor_dtype)
            else:
                dummy_in0 = np.random.choice(a=[False, True],
                                             size=dummy_input_shapes[io_num])
            if tensor_dtype != np.object:
                dummy_in0 = dummy_in0.astype(tensor_dtype)
            else:
                dummy_in0 = np.array([str(x) for x in in0.flatten()],
                                     dtype=object).reshape(in0.shape)

            dummy_input_list.append(dummy_in0)

        # Prepare shape input tensor. Only one tensor per batch
        in0 = np.asarray(input_shape_values[io_num], dtype=np.int32)
        input_list.append(in0)

        # Prepare the expected list for the output
        expected0 = np.ndarray.copy(in0)
        expected_list.append(expected0)

        expected_dict[output_name] = expected_list

        input_byte_size = len(in0) * np.dtype(tensor_dtype).itemsize
        output_byte_size = input_byte_size * batch_size
        dummy_input_byte_size = tu.shape_element_count(dummy_input_shapes[io_num]) *\
                            np.dtype(tensor_dtype).itemsize * batch_size
        # The dimension of this tensor will be the value of the shape tensor
        dummy_output_byte_size = tu.shape_element_count(in0) *\
                            np.dtype(tensor_dtype).itemsize * batch_size

        # create and register shared memory region for inputs and outputs
        if use_cuda_shared_memory:
            shm_ip_handles.append(
                cudashm.create_shared_memory_region(
                    "input" + str(io_num) + "_data" + shm_suffix,
                    input_byte_size, 0))
            shm_ip_handles.append(
                cudashm.create_shared_memory_region(
                    "dummy_input" + str(io_num) + "_data" + shm_suffix,
                    dummy_input_byte_size, 0))
            shm_op_handles.append(
                cudashm.create_shared_memory_region(
                    "output" + str(io_num) + "_data" + shm_suffix,
                    output_byte_size, 0))
            shm_op_handles.append(
                cudashm.create_shared_memory_region(
                    "dummy_output" + str(io_num) + "_data" + shm_suffix,
                    dummy_output_byte_size, 0))

            shared_memory_ctx.cuda_register(shm_ip_handles[2 * io_num])
            shared_memory_ctx.cuda_register(shm_ip_handles[2 * io_num + 1])
            shared_memory_ctx.cuda_register(shm_op_handles[2 * io_num])
            shared_memory_ctx.cuda_register(shm_op_handles[2 * io_num + 1])

            # copy data into shared memory region for input values
            cudashm.set_shared_memory_region(shm_ip_handles[2 * io_num],
                                             input_list)
            cudashm.set_shared_memory_region(shm_ip_handles[2 * io_num + 1],
                                             dummy_input_list)
        elif use_system_shared_memory:
            shm_ip_handles.append(shm.create_shared_memory_region("input"+str(io_num)+"_data"+shm_suffix,\
                                        "/input"+str(io_num)+shm_suffix, input_byte_size))
            shm_ip_handles.append(shm.create_shared_memory_region("dumy_input"+str(io_num)+"_data"+shm_suffix,\
                                        "/dummy_input"+str(io_num)+shm_suffix, dummy_input_byte_size))
            shm_op_handles.append(shm.create_shared_memory_region("output"+str(io_num)+"_data"+shm_suffix,\
                                        "/output"+str(io_num)+shm_suffix, output_byte_size))
            shm_op_handles.append(shm.create_shared_memory_region("dummy_output"+str(io_num)+"_data"+shm_suffix,\
                                        "/dummy_output"+str(io_num)+shm_suffix, dummy_output_byte_size))
            shared_memory_ctx.register(shm_ip_handles[2 * io_num])
            shared_memory_ctx.register(shm_ip_handles[2 * io_num + 1])
            shared_memory_ctx.register(shm_op_handles[2 * io_num])
            shared_memory_ctx.register(shm_op_handles[2 * io_num + 1])
            # copy data into shared memory region for input values
            shm.set_shared_memory_region(shm_ip_handles[2 * io_num],
                                         input_list)
            shm.set_shared_memory_region(shm_ip_handles[2 * io_num + 1],
                                         dummy_input_list)
        if use_system_shared_memory or use_cuda_shared_memory:
            input_dict[input_name] = (shm_ip_handles[2 * io_num],
                                      [len(input_shape_values[0])])
            input_dict[dummy_input_name] = (shm_ip_handles[2 * io_num + 1],
                                            dummy_input_shapes[io_num])
            output_dict[output_name] = (InferContext.ResultFormat.RAW,
                                        shm_op_handles[2 * io_num])
            output_dict[dummy_output_name] = (InferContext.ResultFormat.RAW,
                                              shm_op_handles[2 * io_num + 1])
        else:
            input_dict[input_name] = input_list
            input_dict[dummy_input_name] = dummy_input_list
            output_dict[output_name] = InferContext.ResultFormat.RAW
            output_dict[dummy_output_name] = InferContext.ResultFormat.RAW

    # Run inference and check results for each config
    for config in configs:
        model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)

        ctx = InferContext(config[0],
                           config[1],
                           model_name,
                           model_version,
                           correlation_id=0,
                           streaming=config[2],
                           verbose=True)
        results = ctx.run(input_dict,
                          output_dict,
                          batch_size,
                          priority=priority,
                          timeout_us=timeout_us)

        tester.assertEqual(ctx.get_last_request_model_name(), model_name)
        if model_version is not None:
            tester.assertEqual(ctx.get_last_request_model_version(),
                               model_version)

        tester.assertEqual(len(results), 2 * io_cnt)
        for (result_name, result_val) in iteritems(results):
            tester.assertTrue(result_name in output_dict)
            expected = expected_dict[output_name][0]
            for b in range(batch_size):
                if result_name == output_name:
                    tester.assertEqual(result_val[b].shape, expected.shape)
                    tester.assertTrue(
                        np.array_equal(result_val[b], expected),
                        "{}, {}, slot {}, expected: {}, got {}".format(
                            model_name, result_name, b, expected,
                            result_val[b]))
                elif result_name == dummy_output_name:
                    # The shape of the dummy output should be equal to the shape values
                    # specified in the shape tensor
                    tester.assertTrue(
                        np.array_equal(result_val[b].shape, expected),
                        "{}, {}, slot {}, expected: {}, got {}".format(
                            model_name, result_name, b, expected,
                            result_val[b]))

    if use_cuda_shared_memory or use_system_shared_memory:
        for io_num in range(2 * io_cnt):
            shared_memory_ctx.unregister(shm_ip_handles[io_num])
            shared_memory_ctx.unregister(shm_op_handles[io_num])
            if use_cuda_shared_memory:
                cudashm.destroy_shared_memory_region(shm_ip_handles[io_num])
                cudashm.destroy_shared_memory_region(shm_op_handles[io_num])
            else:
                shm.destroy_shared_memory_region(shm_ip_handles[io_num])
                shm.destroy_shared_memory_region(shm_op_handles[io_num])

    return results