def preprocess(cls, img): """ Pre-process an image to meet the size, type and format requirements specified by the parameters. :param img: Pillow image :returns: - model_input: input as required by the model - extra_data: dict of data that is needed by the postprocess function """ extra_data = {} # Careful, Pillow has (w,h) format but most models expect (h,w) w, h = img.size extra_data["original_image_size"] = (h, w) if cls.SHAPE[2] == 1: img = img.convert("L") else: img = img.convert("RGB") logger.info(f"Original image size: {img.size}") # convert to cv2 img = np.array(img) img = img[:, :, ::-1].copy() img = image_resize(img, cls.SHAPE[1:]) img = image_preprocess(img) npdtype = triton_to_np_dtype(cls.DTYPE) img = img.astype(npdtype) return img, extra_data
def _basic_inference(self, shm_ip0_handle, shm_ip1_handle, shm_op0_handle, shm_op1_handle, error_msg, big_shm_name="", big_shm_size=64): input0_data = np.arange(start=0, stop=16, dtype=np.int32) input1_data = np.ones(shape=16, dtype=np.int32) inputs = [] outputs = [] if _protocol == "http": triton_client = httpclient.InferenceServerClient(_url, verbose=True) inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32")) inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32")) outputs.append( httpclient.InferRequestedOutput('OUTPUT0', binary_data=True)) outputs.append( httpclient.InferRequestedOutput('OUTPUT1', binary_data=False)) else: triton_client = grpcclient.InferenceServerClient(_url, verbose=True) inputs.append(grpcclient.InferInput("INPUT0", [1, 16], "INT32")) inputs.append(grpcclient.InferInput("INPUT1", [1, 16], "INT32")) outputs.append(grpcclient.InferRequestedOutput('OUTPUT0')) outputs.append(grpcclient.InferRequestedOutput('OUTPUT1')) inputs[0].set_shared_memory("input0_data", 64) if type(shm_ip1_handle) == np.array: inputs[1].set_data_from_numpy(input0_data, binary_data=True) elif big_shm_name != "": inputs[1].set_shared_memory(big_shm_name, big_shm_size) else: inputs[1].set_shared_memory("input1_data", 64) outputs[0].set_shared_memory("output0_data", 64) outputs[1].set_shared_memory("output1_data", 64) try: results = triton_client.infer("simple", inputs, model_version="", outputs=outputs) output = results.get_output('OUTPUT0') if _protocol == "http": output_datatype = output['datatype'] output_shape = output['shape'] else: output_datatype = output.datatype output_shape = output.shape output_dtype = utils.triton_to_np_dtype(output_datatype) output_data = shm.get_contents_as_numpy(shm_op0_handle, output_dtype, output_shape) self.assertTrue( (output_data[0] == (input0_data + input1_data)).all(), "Model output does not match expected output") except Exception as ex: error_msg.append(str(ex))
def get_embedding(self, face_img): if not isinstance(face_img, list): face_img = [face_img] face_img = np.stack(face_img) input_size = tuple(face_img[0].shape[0:2][::-1]) blob = cv2.dnn.blobFromImages( face_img, 1.0 / self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True) blob = blob.astype(triton_to_np_dtype(self.dtype)) inputs = [] inputs.append( grpcclient.InferInput(self.input_name, [blob.shape[0], self.c, self.h, self.w], "FP32")) # inputs[0].set_data_from_numpy(face_img) cudashm.set_shared_memory_region(self.in_handle, [blob]) input_bytesize = 12 * blob.shape[0] * self.w * self.h inputs[-1].set_shared_memory(self.in_handle_name, input_bytesize) outputs = [] out_bytesize = 12 * 512 * self.max_batch_size outputs.append(grpcclient.InferRequestedOutput(self.output_name[0])) outputs[-1].set_shared_memory(self.out_handle_name, out_bytesize) out = self.triton_client.infer(self.model_name, inputs, model_version=self.model_version, outputs=outputs) out = [ cudashm.get_contents_as_numpy(self.out_handle, triton_to_np_dtype(self.dtype), [blob.shape[0], 512]) ] # out = [out.as_numpy(e) for e in self.output_name] return out[0]
def predict(self, input_images): # Put input data values into shared memory shm.set_shared_memory_region(self.input_images_handle, [input_images]) results = self.triton_client.infer(model_name=self.model_name, inputs=self.inputs, outputs=self.outputs) # Read results from the shared memory. output = results.get_output("output") output_data = shm.get_contents_as_numpy( self.output_handle, utils.triton_to_np_dtype(output.datatype), output.shape) return output_data
def predict(self, deployment_name, df): single_input_np = None if isinstance(df, np.ndarray): single_input_np = df inputs = [] if single_input_np is not None: model_metadata = self.triton_client.get_model_metadata( deployment_name) raise MlflowException("Unnamed input is not currently supported") else: if isinstance(df, pd.DataFrame): model_metadata = self.triton_client.get_model_metadata( deployment_name) input_dtype = {} for input in model_metadata["inputs"]: input_dtype[input["name"]] = triton_to_np_dtype( input["datatype"]) # Sanity check if len(df.columns) != 1: raise MlflowException( "Expect Pandas DataFrame has only 1 column") col = df.columns[0] for row in df.index: val = df[col][row] # Need to form numpy array of the data type expected if type(df[col][row]) != np.ndarray: val = np.array(val, dtype=input_dtype[row]) inputs.append( tritonhttpclient.InferInput( row, val.shape, np_to_triton_dtype(val.dtype))) inputs[-1].set_data_from_numpy(val) else: for key, val in df: inputs.append( tritonhttpclient.InferInput( key, val.shape, np_to_triton_dtype(val.dtype))) inputs[-1].set_data_from_numpy(val) try: resp = self.triton_client.infer(model_name=deployment_name, inputs=inputs) res = {} for output in resp.get_response()['outputs']: res[output['name']] = resp.as_numpy(output['name']) return {"outputs": res} except InferenceServerException as ex: raise MlflowException(str(ex))
def get_embedding(self,face_img): face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) face_img = np.transpose(face_img, (2, 0, 1)) face_img = np.expand_dims(face_img, axis=0) face_img = face_img.astype(triton_to_np_dtype(self.dtype)) inputs = [] inputs.append(httpclient.InferInput(self.input_name, [1, self.c, self.h,self.w], "FP32")) inputs[0].set_data_from_numpy(face_img) out = self.triton_client.infer(self.model_name, inputs, model_version=self.model_version, outputs=None) out = [out.as_numpy(e)[0] for e in self.output_name] #print(output.get_output(self.output_name)['data']) return out
def preprocess(img, format, dtype, c, h, w, scaling, protocol): """ Pre-process an image to meet the size, type and format requirements specified by the parameters. """ # np.set_printoptions(threshold='nan') if c == 1: sample_img = img.convert('L') else: sample_img = img.convert('RGB') resized_img = sample_img.resize((w, h), Image.BILINEAR) resized = np.array(resized_img) if resized.ndim == 2: resized = resized[:, :, np.newaxis] npdtype = triton_to_np_dtype(dtype) typed = resized.astype(npdtype) if scaling == 'INCEPTION': scaled = (typed / 127.5) - 1 elif scaling == 'VGG': if c == 1: scaled = typed - np.asarray((128,), dtype=npdtype) else: scaled = typed - np.asarray((123, 117, 104), dtype=npdtype) else: scaled = typed # Swap to CHW if necessary if protocol == "grpc": if format == mc.ModelInput.FORMAT_NCHW: ordered = np.transpose(scaled, (2, 0, 1)) else: ordered = scaled else: if format == "FORMAT_NCHW": ordered = np.transpose(scaled, (2, 0, 1)) else: ordered = scaled # Channels are in RGB order. Currently model configuration data # doesn't provide any information as to other channel orderings # (like BGR) so we just assume RGB. return ordered
def __iter__(self): client = InferenceServerClient(self._server_url, verbose=self._verbose) error = self._verify_triton_state(client) if error: raise RuntimeError( f"Could not communicate to Triton Server: {error}") LOGGER.debug( f"Triton server {self._server_url} and model {self._model_name}:{self._model_version} " f"are up and ready!") model_config = client.get_model_config(self._model_name, self._model_version) model_metadata = client.get_model_metadata(self._model_name, self._model_version) LOGGER.info(f"Model config {model_config}") LOGGER.info(f"Model metadata {model_metadata}") inputs = {tm.name: tm for tm in model_metadata.inputs} outputs = {tm.name: tm for tm in model_metadata.outputs} output_names = list(outputs) outputs_req = [InferRequestedOutput(name) for name in outputs] for ids, x, y_real in self._dataloader: infer_inputs = [] for name in inputs: data = x[name] infer_input = InferInput(name, data.shape, inputs[name].datatype) target_np_dtype = client_utils.triton_to_np_dtype( inputs[name].datatype) data = data.astype(target_np_dtype) infer_input.set_data_from_numpy(data) infer_inputs.append(infer_input) results = client.infer( model_name=self._model_name, model_version=self._model_version, inputs=infer_inputs, outputs=outputs_req, client_timeout=self._response_wait_t, ) y_pred = {name: results.as_numpy(name) for name in output_names} yield ids, x, y_pred, y_real
def _pre_process_edgetpu(cls, img, dims): """ set image file dimensions to 224x224 by resizing and cropping image from center :param img: image as array in HWC format :param dims: dims as tuple in HWC order """ output_height, output_width, _ = dims img = cls._resize_with_aspectratio(img, output_height, output_width, inter_pol=cv2.INTER_LINEAR) img = cls._center_crop(img, output_height, output_width) npdtype = triton_to_np_dtype(cls.DTYPE) img = np.asarray(img, dtype=npdtype) # converts jpg pixel value from [0 - 255] to float array [-1.0 - 1.0] img -= [127.0, 127.0, 127.0] img /= [128.0, 128.0, 128.0] return img
def prepare(self, ctx_id=0): concurrency = 2 # Make sure the model matches our requirements, and get some # properties of the model that we need for preprocessing try: model_metadata = self.triton_client.get_model_metadata( model_name=self.model_name, model_version=self.model_version) except InferenceServerException as e: print("failed to retrieve the metadata: " + str(e)) sys.exit(1) logging.info(model_metadata) try: model_config = self.triton_client.get_model_config( model_name=self.model_name, model_version=self.model_version) except InferenceServerException as e: print("failed to retrieve the config: " + str(e)) sys.exit(1) self.max_batch_size, self.input_name, self.output_name, self.c, self.h, self.w, self.format, self.dtype, self.out_shapes = parse_model_grpc( model_metadata, model_config.config) self.input_shape = (1, self.c, self.h, self.w) self.input_dtype = triton_to_np_dtype(self.dtype) self.in_handle_name = f'{self.model_name}_data_{os.getpid()}' if self.max_batch_size <= 0: self.max_batch_size = 1 self.input_bytesize = 12 * self.w * self.h * 1 self.in_handle = cudashm.create_shared_memory_region( self.in_handle_name, self.input_bytesize, 0) self.triton_client.unregister_cuda_shared_memory(self.in_handle_name) self.triton_client.register_cuda_shared_memory( self.in_handle_name, cudashm.get_raw_handle(self.in_handle), 0, self.input_bytesize)
def preprocess(cls, img): """ Pre-process an image to meet the size, type and format requirements specified by the parameters. https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov4 :param img: Pillow image :returns: - model_input: input as required by the model - extra_data: dict of data that is needed by the postprocess function """ extra_data = {} # Careful, Pillow has (w,h) format but most models expect (h,w) w, h = img.size extra_data["original_image_size"] = (h, w) if cls.SHAPE[2] == 1: sample_img = img.convert("L") else: sample_img = img.convert("RGB") logger.info(f"Original image size: {sample_img.size}") # convert to cv2 open_cv_image = np.array(sample_img) open_cv_image = open_cv_image[:, :, ::-1].copy() image = image_preprocess(open_cv_image, (cls.SHAPE[0], cls.SHAPE[1])) npdtype = triton_to_np_dtype(cls.DTYPE) image = image.astype(npdtype) # channels first if needed if cls.CHANNEL_FIRST: img = np.transpose(img, (2, 0, 1)) return image, extra_data
def inputs_outputs_generator(self, raw_inputs): """ Generate inputs and outptus blob for triton client inference :param raw_inputs: list of raw numpy inputs :return: inputs outputs data """ inputs = [] for input_specs, raw_input in zip(self.inputs_specs, raw_inputs): # parse data type raw_input = raw_input.astype( triton_to_np_dtype(input_specs.datatype)) infer_input = grpcclient.InferInput(input_specs.name, raw_input.shape, input_specs.datatype) infer_input.set_data_from_numpy(raw_input) inputs.append(infer_input) outputs = [] for output_specs in self.outputs_specs: outputs.append( grpcclient.InferRequestedOutput(output_specs.name, class_count=0)) return inputs, outputs
# TODO: Make it easily configurable MODEL = "mnist_tf_savedmodel" MODEL_VER = "1" URL_HTTP = "localhost:8000" URL_GRPC = "localhost:8001" INPUT_SHAPE = (28, 28) DATA = "data/7.png" # pre-processing img = Image.open(DATA).convert('L') img = img.resize(INPUT_SHAPE) imgArr = np.asarray(img) / 255 imgArr = np.expand_dims(imgArr[:, :, np.newaxis], 0) imgArr= imgArr.astype(triton_to_np_dtype('FP32')) # Client-Server GRPC print("Using GRPC ... ") triton_client = grpcclient.InferenceServerClient(url=URL_GRPC, verbose=0) inputs = [] inputs.append(grpcclient.InferInput('flatten_1_input', imgArr.shape, 'FP32')) inputs[0].set_data_from_numpy(imgArr) outputs = [] outputs.append(grpcclient.InferRequestedOutput('dense_3', class_count=0)) responses = [] responses.append(triton_client.infer(MODEL,inputs, request_id=str(1), model_version=MODEL_VER, outputs=outputs))
outputs.append(httpclient.InferRequestedOutput('OUTPUT1', binary_data=True)) outputs[-1].set_shared_memory("output_data", output_byte_size, offset=output_byte_size) results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) # Read results from the shared memory. output0 = results.get_output("OUTPUT0") if output0 is not None: output0_data = shm.get_contents_as_numpy( shm_op_handle, utils.triton_to_np_dtype(output0['datatype']), output0['shape']) else: print("OUTPUT0 is missing in the response.") sys.exit(1) output1 = results.get_output("OUTPUT1") if output1 is not None: output1_data = shm.get_contents_as_numpy(shm_op_handle, utils.triton_to_np_dtype( output1['datatype']), output1['shape'], offset=output_byte_size) else: print("OUTPUT1 is missing in the response.") sys.exit(1)
def test_buffer_attributes(self): model_name = 'bls' # Infer clients = [ httpclient.InferenceServerClient(url='localhost:8000'), grpcclient.InferenceServerClient(url='localhost:8001') ] triton_clients = [httpclient, grpcclient] for i, client in enumerate(clients): # To make sure no shared memory regions are registered with the # server. client.unregister_system_shared_memory() client.unregister_cuda_shared_memory() triton_client = triton_clients[i] inputs = [] outputs = [] inputs.append( triton_client.InferInput('INPUT0', [1, 1000], "INT32")) input0_data = np.arange(start=0, stop=1000, dtype=np.int32) input0_data = np.expand_dims(input0_data, axis=0) input_byte_size = input0_data.size * input0_data.itemsize output_byte_size = input_byte_size shm_ip0_handle = cudashm.create_shared_memory_region( "input0_data", input_byte_size, 0) shm_op0_handle = cudashm.create_shared_memory_region( "output0_data", output_byte_size, 0) client.register_cuda_shared_memory( "input0_data", cudashm.get_raw_handle(shm_ip0_handle), 0, input_byte_size) client.register_cuda_shared_memory( "output0_data", cudashm.get_raw_handle(shm_op0_handle), 0, input_byte_size) cudashm.set_shared_memory_region(shm_ip0_handle, [input0_data]) inputs[0].set_shared_memory("input0_data", input_byte_size) if triton_client is grpcclient: outputs.append(triton_client.InferRequestedOutput('OUTPUT0')) outputs[0].set_shared_memory("output0_data", output_byte_size) else: outputs.append( triton_client.InferRequestedOutput('OUTPUT0', binary_data=True)) outputs[0].set_shared_memory("output0_data", output_byte_size) results = client.infer(model_name=model_name, inputs=inputs, outputs=outputs) output0 = results.get_output("OUTPUT0") self.assertIsNotNone(output0) if triton_client is grpcclient: output0_data = cudashm.get_contents_as_numpy( shm_op0_handle, triton_to_np_dtype(output0.datatype), output0.shape) else: output0_data = cudashm.get_contents_as_numpy( shm_op0_handle, triton_to_np_dtype(output0['datatype']), output0['shape']) self.assertTrue(np.all(output0_data == input0_data))
inputs[-1].set_shared_memory("input1_data", input1_byte_size) outputs = [] outputs.append(httpclient.InferRequestedOutput('OUTPUT0', binary_data=True)) outputs[-1].set_shared_memory("output0_data", output0_byte_size) outputs.append(httpclient.InferRequestedOutput('OUTPUT1', binary_data=True)) outputs[-1].set_shared_memory("output1_data", output1_byte_size) results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) # Read results from the shared memory. output0 = results.get_output("OUTPUT0") print(utils.triton_to_np_dtype(output0['datatype'])) if output0 is not None: output0_data = shm.get_contents_as_numpy( shm_op0_handle, utils.triton_to_np_dtype(output0['datatype']), output0['shape']) else: print("OUTPUT0 is missing in the response.") sys.exit(1) output1 = results.get_output("OUTPUT1") if output1 is not None: output1_data = shm.get_contents_as_numpy( shm_op1_handle, utils.triton_to_np_dtype(output1['datatype']), output1['shape']) else: print("OUTPUT1 is missing in the response.")
outputs[-1].set_shared_memory("output_data", output_byte_size) outputs.append(grpcclient.InferRequestedOutput('OUTPUT1')) outputs[-1].set_shared_memory("output_data", output_byte_size, offset=output_byte_size) results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) # Read results from the shared memory. output0 = results.get_output("OUTPUT0") if output0 is not None: output0_data = shm.get_contents_as_numpy( shm_op_handle, utils.triton_to_np_dtype(output0.datatype), output0.shape) else: print("OUTPUT0 is missing in the response.") sys.exit(1) output1 = results.get_output("OUTPUT1") if output1 is not None: output1_data = shm.get_contents_as_numpy(shm_op_handle, utils.triton_to_np_dtype( output1.datatype), output1.shape, offset=output_byte_size) else: print("OUTPUT1 is missing in the response.") sys.exit(1)
inputs[-1].set_shared_memory("input1_data", input1_byte_size) outputs = [] outputs.append(grpcclient.InferRequestedOutput('OUTPUT0')) outputs[-1].set_shared_memory("output0_data", output0_byte_size) outputs.append(grpcclient.InferRequestedOutput('OUTPUT1')) outputs[-1].set_shared_memory("output1_data", output1_byte_size) results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) # Read results from the shared memory. output0 = results.get_output("OUTPUT0") print(utils.triton_to_np_dtype(output0.datatype)) if output0 is not None: output0_data = shm.get_contents_as_numpy( shm_op0_handle, utils.triton_to_np_dtype(output0.datatype), output0.shape) else: print("OUTPUT0 is missing in the response.") sys.exit(1) output1 = results.get_output("OUTPUT1") if output1 is not None: output1_data = shm.get_contents_as_numpy( shm_op1_handle, utils.triton_to_np_dtype(output1.datatype), output1.shape) else: print("OUTPUT1 is missing in the response.")
try: model_config = triton_client.get_model_config( model_name=FLAGS.model_name, model_version=FLAGS.model_version) except InferenceServerException as e: print("failed to retrieve the config: " + str(e)) sys.exit(1) if FLAGS.protocol.lower() == "grpc": max_batch_size, input_name, output_name, dtype = parse_model_grpc( model_metadata, model_config.config) else: max_batch_size, input_name, output_name, dtype = parse_model_http( model_metadata, model_config) input_data = np.zeros([FLAGS.batch_size, FLAGS.shape], dtype=triton_to_np_dtype(dtype)) # --------------------------- Warm-Up -------------------------------------------------------- for i in range(FLAGS.warmup_count): inputs, outputs = requestGenerator(input_name, input_data, output_name, dtype, FLAGS.protocol.lower()) triton_client.infer(FLAGS.model_name, inputs, model_version=FLAGS.model_version, outputs=outputs) latencies = [] # --------------------------- Start Load -------------------------------------------------------- start_time = time.time()
def req_loop(self): client = InferenceServerClient(self._server_url, verbose=self._verbose) self._errors = self._verify_triton_state(client) if self._errors: return LOGGER.debug( f"Triton server {self._server_url} and model {self._model_name}:{self._model_version} " f"are up and ready!") model_config = client.get_model_config(self._model_name, self._model_version) model_metadata = client.get_model_metadata(self._model_name, self._model_version) LOGGER.info(f"Model config {model_config}") LOGGER.info(f"Model metadata {model_metadata}") inputs = {tm.name: tm for tm in model_metadata.inputs} outputs = {tm.name: tm for tm in model_metadata.outputs} output_names = list(outputs) outputs_req = [InferRequestedOutput(name) for name in outputs] self._num_waiting_for = 0 for ids, x, y_real in self._dataloader: infer_inputs = [] for name in inputs: data = x[name] infer_input = InferInput(name, data.shape, inputs[name].datatype) target_np_dtype = client_utils.triton_to_np_dtype( inputs[name].datatype) data = data.astype(target_np_dtype) infer_input.set_data_from_numpy(data) infer_inputs.append(infer_input) with self._sync: def _check_can_send(): return self._num_waiting_for < self._max_unresp_reqs can_send = self._sync.wait_for(_check_can_send, timeout=self._response_wait_t) if not can_send: error_msg = f"Runner could not send new requests for {self._response_wait_t}s" self._errors.append(error_msg) break callback = functools.partial(AsyncGRPCTritonRunner._on_result, self, ids, x, y_real, output_names) client.async_infer( model_name=self._model_name, model_version=self._model_version, inputs=infer_inputs, outputs=outputs_req, callback=callback, ) self._num_waiting_for += 1 # wait till receive all requested data with self._sync: def _all_processed(): LOGGER.debug( f"wait for {self._num_waiting_for} unprocessed jobs") return self._num_waiting_for == 0 self._processed_all = self._sync.wait_for( _all_processed, self.DEFAULT_MAX_FINISH_WAIT_S) if not self._processed_all: error_msg = f"Runner {self._response_wait_t}s timeout received while waiting for results from server" self._errors.append(error_msg) LOGGER.debug("Finished request thread")
def infer_and_validata(use_shared_memory, orig_input0_data, orig_input1_data): if use_shared_memory: input0_data = orig_input0_data input1_data = orig_input1_data byte_size = input0_data.size * input0_data.itemsize inputs[0].set_shared_memory("input0_data", byte_size) inputs[1].set_shared_memory("input1_data", byte_size) outputs[0].set_shared_memory("output0_data", byte_size) outputs[1].set_shared_memory("output1_data", byte_size) else: input0_data = orig_input0_data input1_data = orig_input1_data * 2 inputs[0].set_data_from_numpy(np.expand_dims(input0_data, axis=0)) inputs[1].set_data_from_numpy(np.expand_dims(input1_data, axis=0)) outputs[0].unset_shared_memory() outputs[1].unset_shared_memory() results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) # Read results from the shared memory. output0 = results.get_output("OUTPUT0") if output0 is not None: if use_shared_memory: if protocol == "grpc": output0_data = shm.get_contents_as_numpy( shm_op0_handle, utils.triton_to_np_dtype(output0.datatype), output0.shape) else: output0_data = shm.get_contents_as_numpy( shm_op0_handle, utils.triton_to_np_dtype(output0['datatype']), output0['shape']) else: output0_data = results.as_numpy('OUTPUT0') else: print("OUTPUT0 is missing in the response.") sys.exit(1) output1 = results.get_output("OUTPUT1") if output1 is not None: if use_shared_memory: if protocol == "grpc": output1_data = shm.get_contents_as_numpy( shm_op1_handle, utils.triton_to_np_dtype(output1.datatype), output1.shape) else: output1_data = shm.get_contents_as_numpy( shm_op1_handle, utils.triton_to_np_dtype(output1['datatype']), output1['shape']) else: output1_data = results.as_numpy('OUTPUT1') else: print("OUTPUT1 is missing in the response.") sys.exit(1) if use_shared_memory: print("\n\n======== SHARED_MEMORY ========\n") else: print("\n\n======== NO_SHARED_MEMORY ========\n") for i in range(16): print( str(input0_data[i]) + " + " + str(input1_data[i]) + " = " + str(output0_data[0][i])) print( str(input0_data[i]) + " - " + str(input1_data[i]) + " = " + str(output1_data[0][i])) if (input0_data[i] + input1_data[i]) != output0_data[0][i]: print("shm infer error: incorrect sum") sys.exit(1) if (input0_data[i] - input1_data[i]) != output1_data[0][i]: print("shm infer error: incorrect difference") sys.exit(1) print("\n======== END ========\n\n")