def requestGenerator(input_name, output_name, c, h, w, format, dtype, FLAGS): # Preprocess image into input data according to model requirements image_data = None with Image.open(FLAGS.image_filename) as img: image_data = preprocess(img, format, dtype, c, h, w, FLAGS.scaling) repeated_image_data = [image_data for _ in range(FLAGS.batch_size)] batched_image_data = np.stack(repeated_image_data, axis=0) # Set the input data inputs = [] if FLAGS.protocol.lower() == "grpc": inputs.append(grpcclient.InferInput(input_name, batched_image_data.shape, dtype)) inputs[0].set_data_from_numpy(batched_image_data) else: inputs.append(httpclient.InferInput(input_name, batched_image_data.shape, dtype)) inputs[0].set_data_from_numpy(batched_image_data, binary_data=False) outputs = [] if FLAGS.protocol.lower() == "grpc": outputs.append(grpcclient.InferRequestedOutput(output_name, class_count=FLAGS.classes)) else: outputs.append(httpclient.InferRequestedOutput( output_name, binary_data=False, class_count=FLAGS.classes)) yield inputs, outputs, FLAGS.model_name, FLAGS.model_version
def sync_send(triton_client, result_list, values, batch_size, sequence_id, model_name, model_version): count = 1 for value in values: # Create the tensor for INPUT value_data = np.full(shape=[batch_size, 1], fill_value=value, dtype=np.int32) inputs = [] inputs.append(httpclient.InferInput('INPUT', value_data.shape, "INT32")) # Initialize the data # FIXME, negative value in binary form can't be handled properly, # which causes the library to raise decode exception. inputs[0].set_data_from_numpy(value_data, binary_data=False) outputs = [] outputs.append(httpclient.InferRequestedOutput('OUTPUT')) # Issue the synchronous sequence inference. result = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs, sequence_id=sequence_id, sequence_start=(count == 1), sequence_end=(count == len(values))) result_list.append(result.as_numpy('OUTPUT')) count = count + 1
def test_infer(model_name, input0_data, input1_data): inputs = [] outputs = [] inputs.append(httpclient.InferInput('INPUT0', [1, 16], "INT32")) inputs.append(httpclient.InferInput('INPUT1', [1, 16], "INT32")) # Initialize the data inputs[0].set_data_from_numpy(input0_data, binary_data=False) inputs[1].set_data_from_numpy(input1_data, binary_data=True) outputs.append(httpclient.InferRequestedOutput('OUTPUT0', binary_data=True)) outputs.append( httpclient.InferRequestedOutput('OUTPUT1', binary_data=False)) query_params = {'test_1': 1, 'test_2': 2} results = triton_client.infer(model_name, inputs, outputs=outputs, query_params=query_params) return results
def TestIdentityInference(np_array, binary_data): model_name = "savedmodel_zero_1_object" inputs = [] outputs = [] inputs.append(httpclient.InferInput('INPUT0', np_array.shape, "BYTES")) inputs[0].set_data_from_numpy(np_array, binary_data=binary_data) outputs.append( httpclient.InferRequestedOutput('OUTPUT0', binary_data=binary_data)) results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) if np_array.dtype == np.object: if not np.array_equal(np_array, np.char.decode(results.as_numpy('OUTPUT0'))): print(results.as_numpy('OUTPUT0')) sys.exit(1) else: if not np.array_equal(np_array, results.as_numpy('OUTPUT0')): print(results.as_numpy('OUTPUT0')) sys.exit(1)
inputs.append(httpclient.InferInput('INPUT0', [1, 16], "INT32")) inputs.append(httpclient.InferInput('INPUT1', [1, 16], "INT32")) # Create the data for the two input tensors. Initialize the first # to unique integers and the second to all ones. input0_data = np.arange(start=0, stop=16, dtype=np.int32) input0_data = np.expand_dims(input0_data, axis=0) input1_data = np.ones(shape=(1, 16), dtype=np.int32) # Initialize the data # Enable binary_data after DLIS-1204 is fixed. inputs[0].set_data_from_numpy(input0_data, binary_data=False) inputs[1].set_data_from_numpy(input1_data, binary_data=False) outputs.append( httpclient.InferRequestedOutput('OUTPUT0', binary_data=False)) outputs.append( httpclient.InferRequestedOutput('OUTPUT1', binary_data=False)) # Define the callback function. Note the last two parameters should be # result and error. InferenceServerClient would povide the results of an # inference as tritongrpcclient.core.InferResult in result. For successful # inference, error will be None, otherwise it will be an object of # tritongrpcclient.utils.InferenceServerException holding the error details. def callback(user_data, result, error): if not error: user_data.append(result) else: user_data.append(error) # list to hold the results of inference.
in0 = np.arange(start=0, stop=16, dtype=np.int32) in0 = np.expand_dims(in0, axis=0) in1 = np.ones(shape=(1, 16), dtype=np.int32) expected_sum = np.add(in0, in1) expected_diff = np.subtract(in0, in1) in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object) input0_data = in0n.reshape(in0.shape) in1n = np.array([str(x) for x in in1.reshape(in1.size)], dtype=object) input1_data = in1n.reshape(in1.shape) # Initialize the data inputs[0].set_data_from_numpy(input0_data, binary_data=True) inputs[1].set_data_from_numpy(input1_data, binary_data=False) outputs.append(httpclient.InferRequestedOutput('OUTPUT0', binary_data=True)) outputs.append( httpclient.InferRequestedOutput('OUTPUT1', binary_data=False)) results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) # Get the output arrays from the results output0_data = results.as_numpy('OUTPUT0') output1_data = results.as_numpy('OUTPUT1') for i in range(16): print( str(input0_data[0][i]) + " + " + str(input1_data[0][i]) + " = " + str(output0_data[0][i]))