def __call__(self, raw_img): input_image = self._preprocess_image(raw_img) shm.set_shared_memory_region(self.input_handles["input_1"], [input_image]) outputs = self.triton_client.infer(model_name=self.model_name, model_version=self.model_version, inputs=self.input_layers, outputs=self.output_layers) coverages_output = outputs.get_output("output_cov/Sigmoid") bboxes_output = outputs.get_output("output_bbox/BiasAdd") if coverages_output is not None: coverages = shm.get_contents_as_numpy( self.output_handles["output_cov/Sigmoid"], utils.triton_to_np_dtype(coverages_output.datatype), self._prod(coverages_output.shape)) else: raise Exception( "output_cov/Sigmoid layer data is missing in the response.") if bboxes_output is not None: bboxes = shm.get_contents_as_numpy( self.output_handles["output_bbox/BiasAdd"], utils.triton_to_np_dtype(bboxes_output.datatype), self._prod(bboxes_output.shape)) else: raise Exception( "output_bbox/BiasAdd layer data is missing in the response.") boxes = self.postprocessor.start(bboxes, coverages) boxes = NMS.filter(boxes) return boxes
def run(request): """This function is called every time the webservice receives a request. Notice you need to know the names and data types of the model inputs and outputs. You can get these values by reading the model configuration file or by querying the model metadata endpoint (see parse_model_http in utils.py for an example of how to do this) Parameters ---------- request : str A valid JSON-formatted string containing input data, should contain both the context and a query for the Bidirectional Attention Flow model Returns ---------- result : str String representing the words that answer the provided query """ print(f"request is {request} type is {type(request)}") model_name = "bidaf-9" request = json.loads(request) context = request[0] query = request[1] input_meta, _, _, _ = parse_model_http(model_name=model_name) # We use the np.object data type for string data np_dtype = triton_to_np_dtype(input_meta[0]["datatype"]) cw, cc = preprocess(context, np_dtype) qw, qc = preprocess(query, np_dtype) input_mapping = { "query_word": qw, "query_char": qc, "context_word": cw, "context_char": cc, } res = triton_infer(input_mapping=input_mapping, model_name=model_name) result = postprocess(context_words=cw, answer=res) return result
def preprocess(img, format, dtype, c, h, w, scaling, protocol): """ Pre-process an image to meet the size, type and format requirements specified by the parameters. """ # np.set_printoptions(threshold='nan') if c == 1: sample_img = img.convert('L') else: sample_img = img.convert('RGB') resized_img = sample_img.resize((w, h), Image.BILINEAR) resized = np.array(resized_img) if resized.ndim == 2: resized = resized[:, :, np.newaxis] npdtype = triton_to_np_dtype(dtype) typed = resized.astype(npdtype) if scaling == 'INCEPTION': scaled = (typed / 128) - 1 elif scaling == 'VGG': if c == 1: scaled = typed - np.asarray((128,), dtype=npdtype) else: scaled = typed - np.asarray((123, 117, 104), dtype=npdtype) else: scaled = typed # Swap to CHW if necessary if protocol == "grpc": if format == mc.ModelInput.FORMAT_NCHW: ordered = np.transpose(scaled, (2, 0, 1)) else: ordered = scaled else: if format == "FORMAT_NCHW": ordered = np.transpose(scaled, (2, 0, 1)) else: ordered = scaled # Channels are in RGB order. Currently model configuration data # doesn't provide any information as to other channel orderings # (like BGR) so we just assume RGB. return ordered
def preprocess(img): print(img.size) img.load() # sample_img = img.convert('RGB') print(img.size) resized_img = img.resize((512, 512), Image.LANCZOS) print(resized_img.size) npdtype = triton_to_np_dtype(dtype) resized = np.asarray(resized_img, dtype = npdtype) print("Numpy shape",resized.shape) if format == "NCHW": resized = np.rollaxis(resized, 2, 0) print(resized.shape) resized = resized[np.newaxis, :,:,:] print(resized.shape) np.save("image_array", resized) return resized
def preprocess(img, scaling, dtype): """Pre-process an image to meet the size, type and format requirements specified by the parameters. """ c = 3 h = 224 w = 224 format = "FORMAT_NCHW" if c == 1: sample_img = img.convert("L") else: sample_img = img.convert("RGB") resized_img = sample_img.resize((w, h), Image.BILINEAR) resized = np.array(resized_img) if resized.ndim == 2: resized = resized[:, :, np.newaxis] npdtype = triton_to_np_dtype(dtype) typed = resized.astype(npdtype) if scaling == "INCEPTION": scaled = (typed / 128) - 1 elif scaling == "VGG": if c == 1: scaled = typed - np.asarray((128, ), dtype=npdtype) else: scaled = typed - np.asarray((123, 117, 104), dtype=npdtype) else: scaled = typed # Swap to CHW if necessary if format == "FORMAT_NCHW": ordered = np.transpose(scaled, (2, 0, 1)) else: ordered = scaled # Channels are in RGB order. Currently model configuration data # doesn't provide any information as to other channel orderings # (like BGR) so we just assume RGB. return ordered
outputs = [] outputs.append(httpclient.InferRequestedOutput('OUTPUT0', binary_data=True)) outputs[-1].set_shared_memory("output0_data", output0_byte_size) outputs.append(httpclient.InferRequestedOutput('OUTPUT1', binary_data=True)) outputs[-1].set_shared_memory("output1_data", output1_byte_size) results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) # Read results from the shared memory. output0 = results.get_output("OUTPUT0") print(utils.triton_to_np_dtype(output0['datatype'])) if output0 is not None: output0_data = shm.get_contents_as_numpy( shm_op0_handle, utils.triton_to_np_dtype(output0['datatype']), output0['shape']) else: print("OUTPUT0 is missing in the response.") sys.exit(1) output1 = results.get_output("OUTPUT1") if output1 is not None: output1_data = shm.get_contents_as_numpy( shm_op1_handle, utils.triton_to_np_dtype(output1['datatype']), output1['shape']) else: print("OUTPUT1 is missing in the response.")
outputs = [] outputs.append(tritongrpcclient.InferRequestedOutput('OUTPUT0')) outputs[-1].set_shared_memory("output0_data", output_byte_size) outputs.append(tritongrpcclient.InferRequestedOutput('OUTPUT1')) outputs[-1].set_shared_memory("output1_data", output_byte_size) results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) # Read results from the shared memory. output0 = results.get_output("OUTPUT0") if output0 is not None: output0_data = cudashm.get_contents_as_numpy( shm_op0_handle, utils.triton_to_np_dtype(output0.datatype), output0.shape) else: print("OUTPUT0 is missing in the response.") sys.exit(1) output1 = results.get_output("OUTPUT1") if output1 is not None: output1_data = cudashm.get_contents_as_numpy( shm_op1_handle, utils.triton_to_np_dtype(output1.datatype), output1.shape) else: print("OUTPUT1 is missing in the response.") sys.exit(1) for i in range(16):
def infer_and_validata(use_shared_memory, orig_input0_data, orig_input1_data): if use_shared_memory: input0_data = orig_input0_data input1_data = orig_input1_data byte_size = input0_data.size * input0_data.itemsize inputs[0].set_shared_memory("input0_data", byte_size) inputs[1].set_shared_memory("input1_data", byte_size) outputs[0].set_shared_memory("output0_data", byte_size) outputs[1].set_shared_memory("output1_data", byte_size) else: input0_data = orig_input0_data input1_data = orig_input1_data * 2 inputs[0].set_data_from_numpy(np.expand_dims(input0_data, axis=0)) inputs[1].set_data_from_numpy(np.expand_dims(input1_data, axis=0)) outputs[0].unset_shared_memory() outputs[1].unset_shared_memory() results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) # Read results from the shared memory. output0 = results.get_output("OUTPUT0") if output0 is not None: if use_shared_memory: if protocol == "grpc": output0_data = shm.get_contents_as_numpy( shm_op0_handle, utils.triton_to_np_dtype(output0.datatype), output0.shape) else: output0_data = shm.get_contents_as_numpy( shm_op0_handle, utils.triton_to_np_dtype(output0['datatype']), output0['shape']) else: output0_data = results.as_numpy('OUTPUT0') else: print("OUTPUT0 is missing in the response.") sys.exit(1) output1 = results.get_output("OUTPUT1") if output1 is not None: if use_shared_memory: if protocol == "grpc": output1_data = shm.get_contents_as_numpy( shm_op1_handle, utils.triton_to_np_dtype(output1.datatype), output1.shape) else: output1_data = shm.get_contents_as_numpy( shm_op1_handle, utils.triton_to_np_dtype(output1['datatype']), output1['shape']) else: output1_data = results.as_numpy('OUTPUT1') else: print("OUTPUT1 is missing in the response.") sys.exit(1) if use_shared_memory: print("\n\n======== SHARED_MEMORY ========\n") else: print("\n\n======== NO_SHARED_MEMORY ========\n") for i in range(16): print( str(input0_data[i]) + " + " + str(input1_data[i]) + " = " + str(output0_data[0][i])) print( str(input0_data[i]) + " - " + str(input1_data[i]) + " = " + str(output1_data[0][i])) if (input0_data[i] + input1_data[i]) != output0_data[0][i]: print("shm infer error: incorrect sum") sys.exit(1) if (input0_data[i] - input1_data[i]) != output1_data[0][i]: print("shm infer error: incorrect difference") sys.exit(1) print("\n======== END ========\n\n")