def infer_zero(tester, pf, batch_size, tensor_dtype, input_shapes, output_shapes, model_version=None, use_http=True, use_grpc=True, use_streaming=True): tester.assertTrue(use_http or use_grpc or use_streaming) configs = [] if use_http: if TEST_SHARED_MEMORY: configs.append(("localhost:8000", ProtocolType.HTTP, False, True)) else: configs.append(("localhost:8000", ProtocolType.HTTP, False, False)) if use_grpc: if TEST_SHARED_MEMORY: configs.append(("localhost:8001", ProtocolType.GRPC, False, True)) else: configs.append(("localhost:8001", ProtocolType.GRPC, False, False)) if use_streaming: if TEST_SHARED_MEMORY: configs.append(("localhost:8001", ProtocolType.GRPC, True, True)) else: configs.append(("localhost:8001", ProtocolType.GRPC, True, False)) tester.assertEqual(len(input_shapes), len(output_shapes)) io_cnt = len(input_shapes) for config in configs: model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype) input_dict = {} output_dict = {} expected_dict = {} if config[3]: # create and register shared memory region for inputs and outputs shm_ip_handles = list() shm_op_handles = list() shared_memory_ctx = SharedMemoryControlContext(config[0], config[1], verbose=True) for io_num in range(io_cnt): input0_byte_size = tu.shape_element_count(input_shapes[io_num]) *\ np.dtype(tensor_dtype).itemsize * batch_size output0_byte_size = tu.shape_element_count(output_shapes[io_num]) *\ np.dtype(tensor_dtype).itemsize * batch_size shm_ip_handles.append(shm.create_shared_memory_region("input"+str(io_num)+"_data",\ "/input"+str(io_num), input0_byte_size)) shm_op_handles.append(shm.create_shared_memory_region("output"+str(io_num)+"_data",\ "/output"+str(io_num), output0_byte_size)) shm.register(shm_ip_handles[io_num]) shm.register(shm_op_handles[io_num]) offset_input = 0 offset_output = 0 for io_num in range(io_cnt): if pf == "libtorch" or pf == "libtorch_nobatch": input_name = "INPUT__{}".format(io_num) output_name = "OUTPUT__{}".format(io_num) else: input_name = "INPUT{}".format(io_num) output_name = "OUTPUT{}".format(io_num) input_list = list() expected_list = list() for b in range(batch_size): rtensor_dtype = _range_repr_dtype(tensor_dtype) in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min, high=np.iinfo(rtensor_dtype).max, size=input_shapes[io_num], dtype=rtensor_dtype) if tensor_dtype != np.object: in0 = in0.astype(tensor_dtype) expected0 = np.ndarray.copy(in0) else: expected0 = np.array([ unicode(str(x), encoding='utf-8') for x in in0.flatten() ], dtype=object) in0 = np.array([str(x) for x in in0.flatten()], dtype=object).reshape(in0.shape) expected0 = expected0.reshape(output_shapes[io_num]) input_list.append(in0) expected_list.append(expected0) expected_dict[output_name] = expected_list if config[3]: # copy data into shared memory region for input values shm.set_shared_memory_region(shm_ip_handles[io_num], input_list) input_dict[input_name] = shm_ip_handles[io_num] output_dict[output_name] = (InferContext.ResultFormat.RAW, shm_op_handles[io_num]) else: input_dict[input_name] = input_list output_dict[output_name] = InferContext.ResultFormat.RAW ctx = InferContext(config[0], config[1], model_name, model_version, correlation_id=0, streaming=config[2], verbose=True) results = ctx.run(input_dict, output_dict, batch_size) tester.assertEqual(ctx.get_last_request_model_name(), model_name) if model_version is not None: tester.assertEqual(ctx.get_last_request_model_version(), model_version) tester.assertEqual(len(results), io_cnt) for (result_name, result_val) in iteritems(results): tester.assertTrue(result_name in output_dict) tester.assertTrue(result_name in expected_dict) for b in range(batch_size): expected = expected_dict[result_name][b] tester.assertEqual(result_val[b].shape, expected.shape) tester.assertTrue( np.array_equal(result_val[b], expected), "{}, {}, slot {}, expected: {}, got {}".format( model_name, result_name, b, expected, result_val[b])) if config[3]: for io_num in range(io_cnt): shared_memory_ctx.unregister(shm_ip_handles[io_num]) shm.destroy_shared_memory_region(shm_ip_handles[io_num]) shared_memory_ctx.unregister(shm_op_handles[io_num]) shm.destroy_shared_memory_region(shm_op_handles[io_num]) return results
def infer_zero(tester, pf, batch_size, tensor_dtype, input_shapes, output_shapes, model_version=None, use_http=True, use_grpc=True, use_streaming=True, shm_region_name_prefix=None, use_system_shared_memory=False, use_cuda_shared_memory=False): tester.assertTrue(use_http or use_grpc or use_streaming) configs = [] if use_http: configs.append(("localhost:8000", ProtocolType.HTTP, False)) if use_grpc: configs.append(("localhost:8001", ProtocolType.GRPC, False)) if use_streaming: configs.append(("localhost:8001", ProtocolType.GRPC, True)) tester.assertEqual(len(input_shapes), len(output_shapes)) io_cnt = len(input_shapes) if shm_region_name_prefix is None: shm_region_name_prefix = ["input", "output"] input_dict = {} output_dict = {} expected_dict = {} shm_ip_handles = list() shm_op_handles = list() shared_memory_ctx = SharedMemoryControlContext("localhost:8000", ProtocolType.HTTP, verbose=False) for io_num in range(io_cnt): if pf == "libtorch" or pf == "libtorch_nobatch": input_name = "INPUT__{}".format(io_num) output_name = "OUTPUT__{}".format(io_num) else: input_name = "INPUT{}".format(io_num) output_name = "OUTPUT{}".format(io_num) input_list = list() expected_list = list() for b in range(batch_size): rtensor_dtype = _range_repr_dtype(tensor_dtype) if (rtensor_dtype != np.bool): in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min, high=np.iinfo(rtensor_dtype).max, size=input_shapes[io_num], dtype=rtensor_dtype) else: in0 = np.random.choice(a=[False, True], size=input_shapes[io_num]) if tensor_dtype != np.object: in0 = in0.astype(tensor_dtype) expected0 = np.ndarray.copy(in0) else: expected0 = np.array( [unicode(str(x), encoding='utf-8') for x in in0.flatten()], dtype=object) in0 = np.array([str(x) for x in in0.flatten()], dtype=object).reshape(in0.shape) expected0 = expected0.reshape(output_shapes[io_num]) input_list.append(in0) expected_list.append(expected0) expected_dict[output_name] = expected_list input_byte_size = tu.shape_element_count(input_shapes[io_num]) *\ np.dtype(tensor_dtype).itemsize * batch_size output_byte_size = tu.shape_element_count(output_shapes[io_num]) *\ np.dtype(tensor_dtype).itemsize * batch_size # create and register shared memory region for inputs and outputs shm_io_handle = su.create_register_set_either_shm_region( [ shm_region_name_prefix[0] + str(io_num), shm_region_name_prefix[1] + str(io_num) ], input_list, input_byte_size, output_byte_size, shared_memory_ctx, use_system_shared_memory, use_cuda_shared_memory) if len(shm_io_handle) != 0: shm_ip_handles.append(shm_io_handle[0]) shm_op_handles.append(shm_io_handle[1]) input_dict[input_name] = (shm_ip_handles[io_num], input_shapes) output_dict[output_name] = (InferContext.ResultFormat.RAW, shm_op_handles[io_num]) else: input_dict[input_name] = input_list output_dict[output_name] = InferContext.ResultFormat.RAW # Run inference and check results for each config for config in configs: model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype) ctx = InferContext(config[0], config[1], model_name, model_version, correlation_id=0, streaming=config[2], verbose=True) results = ctx.run(input_dict, output_dict, batch_size) tester.assertEqual(ctx.get_last_request_model_name(), model_name) if model_version is not None: tester.assertEqual(ctx.get_last_request_model_version(), model_version) tester.assertEqual(len(results), io_cnt) for (result_name, result_val) in iteritems(results): tester.assertTrue(result_name in output_dict) tester.assertTrue(result_name in expected_dict) for b in range(batch_size): expected = expected_dict[result_name][b] tester.assertEqual(result_val[b].shape, expected.shape) tester.assertTrue( np.array_equal(result_val[b], expected), "{}, {}, slot {}, expected: {}, got {}".format( model_name, result_name, b, expected, result_val[b])) if len(shm_ip_handles) != 0: for io_num in range(io_cnt): shared_memory_ctx.unregister(shm_ip_handles[io_num]) shared_memory_ctx.unregister(shm_op_handles[io_num]) su.destroy_either_shm_region(shm_ip_handles[io_num], use_system_shared_memory, use_cuda_shared_memory) su.destroy_either_shm_region(shm_op_handles[io_num], use_system_shared_memory, use_cuda_shared_memory) return results
def infer_shape_tensor(tester, pf, batch_size, tensor_dtype, input_shape_values, dummy_input_shapes, model_version=None, use_http=True, use_grpc=True, use_streaming=True, shm_suffix="", use_system_shared_memory=False, use_cuda_shared_memory=False, priority=0, timeout_us=0): tester.assertTrue(use_http or use_grpc or use_streaming) configs = [] if use_http: configs.append(("localhost:8000", ProtocolType.HTTP, False)) if use_grpc: configs.append(("localhost:8001", ProtocolType.GRPC, False)) if use_streaming: configs.append(("localhost:8001", ProtocolType.GRPC, True)) tester.assertEqual(len(input_shape_values), len(dummy_input_shapes)) io_cnt = len(input_shape_values) if use_system_shared_memory and use_cuda_shared_memory: raise ValueError( "Cannot set both System and CUDA shared memory flags to 1") input_dict = {} output_dict = {} expected_dict = {} shm_ip_handles = list() shm_op_handles = list() shared_memory_ctx = SharedMemoryControlContext("localhost:8000", ProtocolType.HTTP, verbose=False) for io_num in range(io_cnt): tester.assertTrue(pf == "plan" or pf == "plan_nobatch") input_name = "INPUT{}".format(io_num) output_name = "OUTPUT{}".format(io_num) dummy_input_name = "DUMMY_INPUT{}".format(io_num) dummy_output_name = "DUMMY_OUTPUT{}".format(io_num) input_list = list() dummy_input_list = list() expected_list = list() for b in range(batch_size): # Prepare the dummy tensor rtensor_dtype = _range_repr_dtype(tensor_dtype) if (rtensor_dtype != np.bool): dummy_in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min, high=np.iinfo(rtensor_dtype).max, size=dummy_input_shapes[io_num], dtype=rtensor_dtype) else: dummy_in0 = np.random.choice(a=[False, True], size=dummy_input_shapes[io_num]) if tensor_dtype != np.object: dummy_in0 = dummy_in0.astype(tensor_dtype) else: dummy_in0 = np.array([str(x) for x in in0.flatten()], dtype=object).reshape(in0.shape) dummy_input_list.append(dummy_in0) # Prepare shape input tensor. Only one tensor per batch in0 = np.asarray(input_shape_values[io_num], dtype=np.int32) input_list.append(in0) # Prepare the expected list for the output expected0 = np.ndarray.copy(in0) expected_list.append(expected0) expected_dict[output_name] = expected_list input_byte_size = len(in0) * np.dtype(tensor_dtype).itemsize output_byte_size = input_byte_size * batch_size dummy_input_byte_size = tu.shape_element_count(dummy_input_shapes[io_num]) *\ np.dtype(tensor_dtype).itemsize * batch_size # The dimension of this tensor will be the value of the shape tensor dummy_output_byte_size = tu.shape_element_count(in0) *\ np.dtype(tensor_dtype).itemsize * batch_size # create and register shared memory region for inputs and outputs if use_cuda_shared_memory: shm_ip_handles.append( cudashm.create_shared_memory_region( "input" + str(io_num) + "_data" + shm_suffix, input_byte_size, 0)) shm_ip_handles.append( cudashm.create_shared_memory_region( "dummy_input" + str(io_num) + "_data" + shm_suffix, dummy_input_byte_size, 0)) shm_op_handles.append( cudashm.create_shared_memory_region( "output" + str(io_num) + "_data" + shm_suffix, output_byte_size, 0)) shm_op_handles.append( cudashm.create_shared_memory_region( "dummy_output" + str(io_num) + "_data" + shm_suffix, dummy_output_byte_size, 0)) shared_memory_ctx.cuda_register(shm_ip_handles[2 * io_num]) shared_memory_ctx.cuda_register(shm_ip_handles[2 * io_num + 1]) shared_memory_ctx.cuda_register(shm_op_handles[2 * io_num]) shared_memory_ctx.cuda_register(shm_op_handles[2 * io_num + 1]) # copy data into shared memory region for input values cudashm.set_shared_memory_region(shm_ip_handles[2 * io_num], input_list) cudashm.set_shared_memory_region(shm_ip_handles[2 * io_num + 1], dummy_input_list) elif use_system_shared_memory: shm_ip_handles.append(shm.create_shared_memory_region("input"+str(io_num)+"_data"+shm_suffix,\ "/input"+str(io_num)+shm_suffix, input_byte_size)) shm_ip_handles.append(shm.create_shared_memory_region("dumy_input"+str(io_num)+"_data"+shm_suffix,\ "/dummy_input"+str(io_num)+shm_suffix, dummy_input_byte_size)) shm_op_handles.append(shm.create_shared_memory_region("output"+str(io_num)+"_data"+shm_suffix,\ "/output"+str(io_num)+shm_suffix, output_byte_size)) shm_op_handles.append(shm.create_shared_memory_region("dummy_output"+str(io_num)+"_data"+shm_suffix,\ "/dummy_output"+str(io_num)+shm_suffix, dummy_output_byte_size)) shared_memory_ctx.register(shm_ip_handles[2 * io_num]) shared_memory_ctx.register(shm_ip_handles[2 * io_num + 1]) shared_memory_ctx.register(shm_op_handles[2 * io_num]) shared_memory_ctx.register(shm_op_handles[2 * io_num + 1]) # copy data into shared memory region for input values shm.set_shared_memory_region(shm_ip_handles[2 * io_num], input_list) shm.set_shared_memory_region(shm_ip_handles[2 * io_num + 1], dummy_input_list) if use_system_shared_memory or use_cuda_shared_memory: input_dict[input_name] = (shm_ip_handles[2 * io_num], [len(input_shape_values[0])]) input_dict[dummy_input_name] = (shm_ip_handles[2 * io_num + 1], dummy_input_shapes[io_num]) output_dict[output_name] = (InferContext.ResultFormat.RAW, shm_op_handles[2 * io_num]) output_dict[dummy_output_name] = (InferContext.ResultFormat.RAW, shm_op_handles[2 * io_num + 1]) else: input_dict[input_name] = input_list input_dict[dummy_input_name] = dummy_input_list output_dict[output_name] = InferContext.ResultFormat.RAW output_dict[dummy_output_name] = InferContext.ResultFormat.RAW # Run inference and check results for each config for config in configs: model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype) ctx = InferContext(config[0], config[1], model_name, model_version, correlation_id=0, streaming=config[2], verbose=True) results = ctx.run(input_dict, output_dict, batch_size, priority=priority, timeout_us=timeout_us) tester.assertEqual(ctx.get_last_request_model_name(), model_name) if model_version is not None: tester.assertEqual(ctx.get_last_request_model_version(), model_version) tester.assertEqual(len(results), 2 * io_cnt) for (result_name, result_val) in iteritems(results): tester.assertTrue(result_name in output_dict) expected = expected_dict[output_name][0] for b in range(batch_size): if result_name == output_name: tester.assertEqual(result_val[b].shape, expected.shape) tester.assertTrue( np.array_equal(result_val[b], expected), "{}, {}, slot {}, expected: {}, got {}".format( model_name, result_name, b, expected, result_val[b])) elif result_name == dummy_output_name: # The shape of the dummy output should be equal to the shape values # specified in the shape tensor tester.assertTrue( np.array_equal(result_val[b].shape, expected), "{}, {}, slot {}, expected: {}, got {}".format( model_name, result_name, b, expected, result_val[b])) if use_cuda_shared_memory or use_system_shared_memory: for io_num in range(2 * io_cnt): shared_memory_ctx.unregister(shm_ip_handles[io_num]) shared_memory_ctx.unregister(shm_op_handles[io_num]) if use_cuda_shared_memory: cudashm.destroy_shared_memory_region(shm_ip_handles[io_num]) cudashm.destroy_shared_memory_region(shm_op_handles[io_num]) else: shm.destroy_shared_memory_region(shm_ip_handles[io_num]) shm.destroy_shared_memory_region(shm_op_handles[io_num]) return results