Example #1
0
def requestGenerator(input_name, output_name, c, h, w, format, dtype, FLAGS):
    # Preprocess image into input data according to model requirements
    image_data = None
    with Image.open(FLAGS.image_filename) as img:
        image_data = preprocess(img, format, dtype, c, h, w, FLAGS.scaling)

    repeated_image_data = [image_data for _ in range(FLAGS.batch_size)]
    batched_image_data = np.stack(repeated_image_data, axis=0)

    # Set the input data
    inputs = []
    if FLAGS.protocol.lower() == "grpc":
        inputs.append(
            grpcclient.InferInput(input_name, batched_image_data.shape, dtype))
        inputs[0].set_data_from_numpy(batched_image_data)
    else:
        inputs.append(
            httpclient.InferInput(input_name, batched_image_data.shape, dtype))
        inputs[0].set_data_from_numpy(batched_image_data, binary_data=False)

    outputs = []
    if FLAGS.protocol.lower() == "grpc":
        outputs.append(
            grpcclient.InferOutput(output_name, class_count=FLAGS.classes))
    else:
        outputs.append(
            httpclient.InferOutput(output_name,
                                   binary_data=False,
                                   class_count=FLAGS.classes))

    yield inputs, outputs, FLAGS.model_name, FLAGS.model_version
def async_send(triton_client, stream, values, batch_size, sequence_id,
               model_name, model_version):

    count = 1
    for value in values:
        # Create the tensor for INPUT
        value_data = np.full(shape=[batch_size, 1],
                             fill_value=value,
                             dtype=np.int32)
        inputs = []
        inputs.append(grpcclient.InferInput('INPUT'))
        # Initialize the data
        inputs[0].set_data_from_numpy(value_data)
        outputs = []
        outputs.append(grpcclient.InferOutput('OUTPUT'))
        # Issue the asynchronous sequence inference.
        triton_client.async_stream_infer(model_name=model_name,
                                         inputs=inputs,
                                         stream=stream,
                                         outputs=outputs,
                                         request_id='{}_{}'.format(
                                             sequence_id, count),
                                         sequence_id=sequence_id,
                                         sequence_start=(count == 1),
                                         sequence_end=(count == len(values)))
        count = count + 1
Example #3
0
def requestGenerator(input_name, output_name, c, h, w, format, dtype, FLAGS):
    inputs = []
    inputs.append(grpcclient.InferInput(input_name))

    # Preprocess image into input data according to model requirements
    image_data = None
    with Image.open(FLAGS.image_filename) as img:
        image_data = preprocess(img, format, dtype, c, h, w, FLAGS.scaling)

    repeated_image_data = [image_data for _ in range(FLAGS.batch_size)]
    batched_image_data = np.stack(repeated_image_data, axis=0)

    # Set the input data
    inputs[0].set_data_from_numpy(batched_image_data)

    outputs = []
    outputs.append(grpcclient.InferOutput(output_name))
    outputs[0].set_parameter("classification", 2)

    yield inputs, outputs, FLAGS.model_name, FLAGS.model_version
def sync_send(triton_client, result_list, values, batch_size, sequence_id,
              model_name, model_version):

    count = 1
    for value in values:
        # Create the tensor for INPUT
        value_data = np.full(shape=[batch_size, 1],
                             fill_value=value,
                             dtype=np.int32)
        inputs = []
        inputs.append(grpcclient.InferInput('INPUT', value_data.shape, "INT32"))
        # Initialize the data
        inputs[0].set_data_from_numpy(value_data)
        outputs = []
        outputs.append(grpcclient.InferOutput('OUTPUT'))
        # Issue the synchronous sequence inference.
        result = triton_client.infer(model_name=model_name,
                                     inputs=inputs,
                                     outputs=outputs,
                                     sequence_id=sequence_id,
                                     sequence_start=(count == 1),
                                     sequence_end=(count == len(values)))
        result_list.append(result.as_numpy('OUTPUT'))
        count = count + 1
Example #5
0
    triton_client.register_cuda_shared_memory(
        "input1_data", cudashm.get_raw_handle(shm_ip1_handle), 0,
        input_byte_size)

    # Set the parameters to use data from shared memory
    inputs = []
    inputs.append(grpcclient.InferInput('INPUT0', [1, 16], "INT32"))
    inputs[-1].set_parameter("shared_memory_region", "input0_data")
    inputs[-1].set_parameter("shared_memory_byte_size", input_byte_size)

    inputs.append(grpcclient.InferInput('INPUT1', [1, 16], "INT32"))
    inputs[-1].set_parameter("shared_memory_region", "input1_data")
    inputs[-1].set_parameter("shared_memory_byte_size", input_byte_size)

    outputs = []
    outputs.append(grpcclient.InferOutput('OUTPUT0'))
    # outputs[-1].set_parameter("shared_memory_region", "output0_data")
    # outputs[-1].set_parameter("shared_memory_byte_size", output_byte_size)

    outputs.append(grpcclient.InferOutput('OUTPUT1'))
    # outputs[-1].set_parameter("shared_memory_region", "output1_data")
    # outputs[-1].set_parameter("shared_memory_byte_size", output_byte_size)

    results = triton_client.infer(model_name=model_name,
                                  inputs=inputs,
                                  outputs=outputs)

    # TODO : Currently, this example doesn't use shared memory for output.
    # This is done to effectively validate the results.
    # tritongrpcclient.cuda_shared_memory module will be enhanced to read
    # data from a specified shared memory handle, data_type and shape;