def __call__(self, raw_img):
        input_image = self._preprocess_image(raw_img)
        shm.set_shared_memory_region(self.input_handles["input_1"],
                                     [input_image])

        outputs = self.triton_client.infer(model_name=self.model_name,
                                           model_version=self.model_version,
                                           inputs=self.input_layers,
                                           outputs=self.output_layers)
        coverages_output = outputs.get_output("output_cov/Sigmoid")
        bboxes_output = outputs.get_output("output_bbox/BiasAdd")

        if coverages_output is not None:
            coverages = shm.get_contents_as_numpy(
                self.output_handles["output_cov/Sigmoid"],
                utils.triton_to_np_dtype(coverages_output.datatype),
                self._prod(coverages_output.shape))
        else:
            raise Exception(
                "output_cov/Sigmoid layer data is missing in the response.")

        if bboxes_output is not None:
            bboxes = shm.get_contents_as_numpy(
                self.output_handles["output_bbox/BiasAdd"],
                utils.triton_to_np_dtype(bboxes_output.datatype),
                self._prod(bboxes_output.shape))
        else:
            raise Exception(
                "output_bbox/BiasAdd layer data is missing in the response.")

        boxes = self.postprocessor.start(bboxes, coverages)
        boxes = NMS.filter(boxes)
        return boxes
Ejemplo n.º 2
0
def run(request):
    """This function is called every time the webservice receives a request.

    Notice you need to know the names and data types of the model inputs and
    outputs. You can get these values by reading the model configuration file
    or by querying the model metadata endpoint (see parse_model_http in
    utils.py for an example of how to do this)

    Parameters
    ----------
    request : str
        A valid JSON-formatted string containing input data, should
        contain both the context and a query for the Bidirectional
        Attention Flow model

    Returns
    ----------
    result : str
        String representing the words that answer the provided query

    """

    print(f"request is {request} type is {type(request)}")
    model_name = "bidaf-9"

    request = json.loads(request)
    context = request[0]
    query = request[1]

    input_meta, _, _, _ = parse_model_http(model_name=model_name)

    # We use the np.object data type for string data
    np_dtype = triton_to_np_dtype(input_meta[0]["datatype"])
    cw, cc = preprocess(context, np_dtype)
    qw, qc = preprocess(query, np_dtype)

    input_mapping = {
        "query_word": qw,
        "query_char": qc,
        "context_word": cw,
        "context_char": cc,
    }

    res = triton_infer(input_mapping=input_mapping, model_name=model_name)

    result = postprocess(context_words=cw, answer=res)
    return result
def preprocess(img, format, dtype, c, h, w, scaling, protocol):
    """
    Pre-process an image to meet the size, type and format
    requirements specified by the parameters.
    """
    # np.set_printoptions(threshold='nan')

    if c == 1:
        sample_img = img.convert('L')
    else:
        sample_img = img.convert('RGB')

    resized_img = sample_img.resize((w, h), Image.BILINEAR)
    resized = np.array(resized_img)
    if resized.ndim == 2:
        resized = resized[:, :, np.newaxis]

    npdtype = triton_to_np_dtype(dtype)
    typed = resized.astype(npdtype)

    if scaling == 'INCEPTION':
        scaled = (typed / 128) - 1
    elif scaling == 'VGG':
        if c == 1:
            scaled = typed - np.asarray((128,), dtype=npdtype)
        else:
            scaled = typed - np.asarray((123, 117, 104), dtype=npdtype)
    else:
        scaled = typed

    # Swap to CHW if necessary
    if protocol == "grpc":
        if format == mc.ModelInput.FORMAT_NCHW:
            ordered = np.transpose(scaled, (2, 0, 1))
        else:
            ordered = scaled
    else:
        if format == "FORMAT_NCHW":
            ordered = np.transpose(scaled, (2, 0, 1))
        else:
            ordered = scaled

    # Channels are in RGB order. Currently model configuration data
    # doesn't provide any information as to other channel orderings
    # (like BGR) so we just assume RGB.
    return ordered
Ejemplo n.º 4
0
def preprocess(img):
    print(img.size)
    img.load()
    # sample_img = img.convert('RGB')
    print(img.size)
    resized_img = img.resize((512, 512), Image.LANCZOS)
    print(resized_img.size)
    npdtype = triton_to_np_dtype(dtype)
    resized = np.asarray(resized_img, dtype = npdtype)
    print("Numpy shape",resized.shape)
    if format == "NCHW":
        resized = np.rollaxis(resized, 2, 0)
    print(resized.shape)
    resized = resized[np.newaxis, :,:,:]
    print(resized.shape)
    np.save("image_array", resized)
    return resized
Ejemplo n.º 5
0
def preprocess(img, scaling, dtype):
    """Pre-process an image to meet the size, type and format
    requirements specified by the parameters.
    """
    c = 3
    h = 224
    w = 224
    format = "FORMAT_NCHW"

    if c == 1:
        sample_img = img.convert("L")
    else:
        sample_img = img.convert("RGB")

    resized_img = sample_img.resize((w, h), Image.BILINEAR)
    resized = np.array(resized_img)
    if resized.ndim == 2:
        resized = resized[:, :, np.newaxis]

    npdtype = triton_to_np_dtype(dtype)
    typed = resized.astype(npdtype)

    if scaling == "INCEPTION":
        scaled = (typed / 128) - 1
    elif scaling == "VGG":
        if c == 1:
            scaled = typed - np.asarray((128, ), dtype=npdtype)
        else:
            scaled = typed - np.asarray((123, 117, 104), dtype=npdtype)
    else:
        scaled = typed

    # Swap to CHW if necessary
    if format == "FORMAT_NCHW":
        ordered = np.transpose(scaled, (2, 0, 1))
    else:
        ordered = scaled

    # Channels are in RGB order. Currently model configuration data
    # doesn't provide any information as to other channel orderings
    # (like BGR) so we just assume RGB.
    return ordered
    outputs = []
    outputs.append(httpclient.InferRequestedOutput('OUTPUT0',
                                                   binary_data=True))
    outputs[-1].set_shared_memory("output0_data", output0_byte_size)

    outputs.append(httpclient.InferRequestedOutput('OUTPUT1',
                                                   binary_data=True))
    outputs[-1].set_shared_memory("output1_data", output1_byte_size)

    results = triton_client.infer(model_name=model_name,
                                  inputs=inputs,
                                  outputs=outputs)

    # Read results from the shared memory.
    output0 = results.get_output("OUTPUT0")
    print(utils.triton_to_np_dtype(output0['datatype']))
    if output0 is not None:
        output0_data = shm.get_contents_as_numpy(
            shm_op0_handle, utils.triton_to_np_dtype(output0['datatype']),
            output0['shape'])
    else:
        print("OUTPUT0 is missing in the response.")
        sys.exit(1)

    output1 = results.get_output("OUTPUT1")
    if output1 is not None:
        output1_data = shm.get_contents_as_numpy(
            shm_op1_handle, utils.triton_to_np_dtype(output1['datatype']),
            output1['shape'])
    else:
        print("OUTPUT1 is missing in the response.")
    outputs = []
    outputs.append(tritongrpcclient.InferRequestedOutput('OUTPUT0'))
    outputs[-1].set_shared_memory("output0_data", output_byte_size)

    outputs.append(tritongrpcclient.InferRequestedOutput('OUTPUT1'))
    outputs[-1].set_shared_memory("output1_data", output_byte_size)

    results = triton_client.infer(model_name=model_name,
                                  inputs=inputs,
                                  outputs=outputs)

    # Read results from the shared memory.
    output0 = results.get_output("OUTPUT0")
    if output0 is not None:
        output0_data = cudashm.get_contents_as_numpy(
            shm_op0_handle, utils.triton_to_np_dtype(output0.datatype),
            output0.shape)
    else:
        print("OUTPUT0 is missing in the response.")
        sys.exit(1)

    output1 = results.get_output("OUTPUT1")
    if output1 is not None:
        output1_data = cudashm.get_contents_as_numpy(
            shm_op1_handle, utils.triton_to_np_dtype(output1.datatype),
            output1.shape)
    else:
        print("OUTPUT1 is missing in the response.")
        sys.exit(1)

    for i in range(16):
def infer_and_validata(use_shared_memory, orig_input0_data, orig_input1_data):
    if use_shared_memory:
        input0_data = orig_input0_data
        input1_data = orig_input1_data
        byte_size = input0_data.size * input0_data.itemsize
        inputs[0].set_shared_memory("input0_data", byte_size)
        inputs[1].set_shared_memory("input1_data", byte_size)
        outputs[0].set_shared_memory("output0_data", byte_size)
        outputs[1].set_shared_memory("output1_data", byte_size)
    else:
        input0_data = orig_input0_data
        input1_data = orig_input1_data * 2
        inputs[0].set_data_from_numpy(np.expand_dims(input0_data, axis=0))
        inputs[1].set_data_from_numpy(np.expand_dims(input1_data, axis=0))
        outputs[0].unset_shared_memory()
        outputs[1].unset_shared_memory()

    results = triton_client.infer(model_name=model_name,
                                  inputs=inputs,
                                  outputs=outputs)

    # Read results from the shared memory.
    output0 = results.get_output("OUTPUT0")
    if output0 is not None:
        if use_shared_memory:
            if protocol == "grpc":
                output0_data = shm.get_contents_as_numpy(
                    shm_op0_handle, utils.triton_to_np_dtype(output0.datatype),
                    output0.shape)
            else:
                output0_data = shm.get_contents_as_numpy(
                    shm_op0_handle,
                    utils.triton_to_np_dtype(output0['datatype']),
                    output0['shape'])
        else:
            output0_data = results.as_numpy('OUTPUT0')
    else:
        print("OUTPUT0 is missing in the response.")
        sys.exit(1)

    output1 = results.get_output("OUTPUT1")
    if output1 is not None:
        if use_shared_memory:
            if protocol == "grpc":
                output1_data = shm.get_contents_as_numpy(
                    shm_op1_handle, utils.triton_to_np_dtype(output1.datatype),
                    output1.shape)
            else:
                output1_data = shm.get_contents_as_numpy(
                    shm_op1_handle,
                    utils.triton_to_np_dtype(output1['datatype']),
                    output1['shape'])
        else:
            output1_data = results.as_numpy('OUTPUT1')
    else:
        print("OUTPUT1 is missing in the response.")
        sys.exit(1)

    if use_shared_memory:
        print("\n\n======== SHARED_MEMORY ========\n")
    else:
        print("\n\n======== NO_SHARED_MEMORY ========\n")
    for i in range(16):
        print(
            str(input0_data[i]) + " + " + str(input1_data[i]) + " = " +
            str(output0_data[0][i]))
        print(
            str(input0_data[i]) + " - " + str(input1_data[i]) + " = " +
            str(output1_data[0][i]))
        if (input0_data[i] + input1_data[i]) != output0_data[0][i]:
            print("shm infer error: incorrect sum")
            sys.exit(1)
        if (input0_data[i] - input1_data[i]) != output1_data[0][i]:
            print("shm infer error: incorrect difference")
            sys.exit(1)
    print("\n======== END ========\n\n")