def execute(self, requests): output0_dtype = self.output0_dtype output1_dtype = self.output1_dtype responses = [] for request in requests: in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1") # If both of the tensors are in CPU, use NumPy. if in_0.is_cpu() and in_1.is_cpu(): if in_0.as_numpy().dtype.type is np.bytes_ or in_0.as_numpy( ).dtype == np.object_: out_0, out_1 = (in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),\ in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32)) out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype)) out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype)) else: in_0_pytorch, in_1_pytorch = from_dlpack( in_0.to_dlpack()), from_dlpack(in_1.to_dlpack()) out_0, out_1 = (in_0_pytorch - in_1_pytorch, in_0_pytorch + in_1_pytorch) if self.output0_dtype == np.object_: out_tensor_0 = pb_utils.Tensor( "OUTPUT0", out_0.numpy().astype(output0_dtype)) else: out_0 = out_0.type( self.numpy_to_pytorch_dtype[output0_dtype]) out_tensor_0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", to_dlpack(out_0)) if self.output1_dtype == np.object_: out_tensor_1 = pb_utils.Tensor( "OUTPUT1", out_1.numpy().astype(output1_dtype)) else: out_1 = out_1.type( self.numpy_to_pytorch_dtype[output1_dtype]) out_tensor_1 = pb_utils.Tensor.from_dlpack( "OUTPUT1", to_dlpack(out_1)) else: in_0_pytorch, in_1_pytorch = from_dlpack( in_0.to_dlpack()).cuda(), from_dlpack( in_1.to_dlpack()).cuda() out_0, out_1 = (in_0_pytorch - in_1_pytorch, in_0_pytorch + in_1_pytorch) out_tensor_0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", to_dlpack(out_0)) out_tensor_1 = pb_utils.Tensor.from_dlpack( "OUTPUT1", to_dlpack(out_1)) responses.append( pb_utils.InferenceResponse([out_tensor_0, out_tensor_1])) return responses
def execute(self, requests): """Model supporting optional inputs. If the input is not provided, an input tensor of size 1 containing scalar 5 will be used.""" responses = [] for request in requests: input0_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0") input1_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT1") if input0_tensor is not None: input0_numpy = input0_tensor.as_numpy() else: input0_numpy = np.array([5], dtype=np.int32) if input1_tensor is not None: input1_numpy = input1_tensor.as_numpy() else: input1_numpy = np.array([5], dtype=np.int32) output0_tensor = pb_utils.Tensor("OUTPUT0", input0_numpy + input1_numpy) output1_tensor = pb_utils.Tensor("OUTPUT1", input0_numpy - input1_numpy) responses.append( pb_utils.InferenceResponse([output0_tensor, output1_tensor])) return responses
def bls_add_sub(_=None): input0_np = np.random.randn(*[16]) input0_np = input0_np.astype(np.float32) input1_np = np.random.randn(*[16]) input1_np = input1_np.astype(np.float32) input0 = pb_utils.Tensor('INPUT0', input0_np) input1 = pb_utils.Tensor('INPUT1', input1_np) infer_request = pb_utils.InferenceRequest( model_name='add_sub', inputs=[input0, input1], requested_output_names=['OUTPUT0', 'OUTPUT1']) infer_response = infer_request.exec() if infer_response.has_error(): return False output0 = pb_utils.get_output_tensor_by_name(infer_response, 'OUTPUT0') output1 = pb_utils.get_output_tensor_by_name(infer_response, 'OUTPUT1') if output0 is None or output1 is None: return False expected_output_0 = input0.as_numpy() + input1.as_numpy() expected_output_1 = input0.as_numpy() - input1.as_numpy() if not np.all(expected_output_0 == output0.as_numpy()): return False if not np.all(expected_output_1 == output1.as_numpy()): return False return True
def execute(self, requests): """ This function is called on inference request. """ output0_dtype = self.output0_dtype output1_dtype = self.output1_dtype responses = [] for request in requests: input_tensors = request.inputs() in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1") if in_0.as_numpy().dtype.type is np.bytes_ or in_0.as_numpy( ).dtype == np.object: out_0, out_1 = (in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),\ in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32)) else: out_0, out_1 = (in_0.as_numpy() - in_1.as_numpy(), in_0.as_numpy() + in_1.as_numpy()) out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype)) out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype)) responses.append( pb_utils.InferenceResponse([out_tensor_0, out_tensor_1])) return responses
def execute(self, requests): output0_dtype = self.output0_dtype output1_dtype = self.output1_dtype responses = [] for request in requests: THRESHOLD = 0.20 # Get input x_recon = pb_utils.get_input_tensor_by_name( request, "RECONSTR0").as_numpy() x_orig = pb_utils.get_input_tensor_by_name(request, "ORIG0").as_numpy() # Get Mean square error between reconstructed input and original input reconstruction_score = np.mean((x_orig - x_recon)**2, axis=1) anomaly = reconstruction_score > THRESHOLD # Create output tensors out_tensor_0 = pb_utils.Tensor( "ANOMALY_SCORE0", reconstruction_score.astype(output0_dtype)) out_tensor_1 = pb_utils.Tensor("ANOMALY0", anomaly.astype(output1_dtype)) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor_0, out_tensor_1]) responses.append(inference_response) return responses
def execute(self, requests): """ Create a response sender object and use that for sending the response. """ # This model does not support batching, so 'request_count' should always be 1. if len(requests) != 1: raise pb_utils.TritonModelException("unsupported batch size " + len(requests)) output0_dtype = self.output0_dtype output1_dtype = self.output1_dtype response_sender = requests[0].get_response_sender() in_0 = pb_utils.get_input_tensor_by_name(requests[0], "INPUT0") in_1 = pb_utils.get_input_tensor_by_name(requests[0], "INPUT1") out_0, out_1 = (in_0.as_numpy() + in_1.as_numpy(), in_0.as_numpy() - in_1.as_numpy()) out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype)) out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype)) response = pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]) response_sender.send( flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) response_sender.send(response)
def execute(self, requests): """`execute` MUST be implemented in every Python model. `execute` function receives a list of pb_utils.InferenceRequest as the only argument. This function is called when an inference request is made for this model. Depending on the batching configuration (e.g. Dynamic Batching) used, `requests` may contain multiple requests. Every Python model, must create one pb_utils.InferenceResponse for every pb_utils.InferenceRequest in `requests`. If there is an error, you can set the error argument when creating a pb_utils.InferenceResponse Parameters ---------- requests : list A list of pb_utils.InferenceRequest Returns ------- list A list of pb_utils.InferenceResponse. The length of this list must be the same as `requests` """ output0_dtype = self.output0_dtype output1_dtype = self.output1_dtype responses = [] # Every Python backend must iterate over everyone of the requests # and create a pb_utils.InferenceResponse for each of them. for request in requests: # Get INPUT0 in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") # Get INPUT1 in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1") out_0, out_1 = (in_0.as_numpy() + in_1.as_numpy(), in_0.as_numpy() - in_1.as_numpy()) # Create output tensors. You need pb_utils.Tensor # objects to create pb_utils.InferenceResponse. out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype)) out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype)) # Create InferenceResponse. You can set an error here in case # there was a problem with handling this inference request. # Below is an example of how you can set errors in inference # response: # # pb_utils.InferenceResponse( # output_tensors=..., TritonError("An error occured")) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor_0, out_tensor_1]) responses.append(inference_response) # You should return a list of pb_utils.InferenceResponse. Length # of this list must match the length of `requests` list. return responses
def _send_bls_sequence_requests(self, correlation_id): # Start request try: input = pb_utils.Tensor('INPUT', np.array([1000], dtype=np.int32)) infer_request = pb_utils.InferenceRequest( model_name='onnx_nobatch_sequence_int32', inputs=[input], requested_output_names=['OUTPUT'], flags=pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START, correlation_id=correlation_id) self.assertTrue(infer_request.flags(), pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START) infer_response = infer_request.exec() self.assertFalse(infer_response.has_error()) output = pb_utils.get_output_tensor_by_name( infer_response, 'OUTPUT') self.assertEqual(output.as_numpy()[0], input.as_numpy()[0]) for i in range(10): input = pb_utils.Tensor('INPUT', np.array([i], dtype=np.int32)) infer_request = pb_utils.InferenceRequest( model_name='onnx_nobatch_sequence_int32', inputs=[input], requested_output_names=['OUTPUT'], correlation_id=correlation_id) infer_response = infer_request.exec() self.assertFalse(infer_response.has_error()) # The new output is the previous output + the current input expected_output = output.as_numpy()[0] + i output = pb_utils.get_output_tensor_by_name( infer_response, 'OUTPUT') self.assertEqual(output.as_numpy()[0], expected_output) # Final request input = pb_utils.Tensor('INPUT', np.array([2000], dtype=np.int32)) infer_request = pb_utils.InferenceRequest( model_name='onnx_nobatch_sequence_int32', inputs=[input], requested_output_names=['OUTPUT'], correlation_id=correlation_id) infer_request.set_flags( pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_END) self.assertTrue(infer_request.flags(), pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_END) infer_response = infer_request.exec() self.assertFalse(infer_response.has_error()) expected_output = output.as_numpy()[0] + input.as_numpy()[0] output = pb_utils.get_output_tensor_by_name( infer_response, 'OUTPUT') self.assertEqual(output.as_numpy()[0], expected_output) except Exception as e: self.add_deferred_exception(e)
def execute(self, requests): responses = [] new_shape = [64, 2, 32, 55, 84] shape_reorder = [1, 0, 4, 2, 3] for request in requests: input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0") input_numpy = input_tensor.as_numpy() output0 = pb_utils.Tensor("OUTPUT0", input_numpy.reshape(new_shape)) # Transpose the tensor to create a non-contiguous tensor. output1 = pb_utils.Tensor("OUTPUT1", input_numpy.T) output2 = pb_utils.Tensor("OUTPUT2", np.transpose(input_numpy, shape_reorder)) responses.append( pb_utils.InferenceResponse([output0, output1, output2])) return responses
def response_thread(self, response_sender, input0, gpu_output): # Sleep 5 seconds to make sure the main thread has exited. time.sleep(5) if input0.is_cpu(): if not gpu_output[0]: output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack()) else: outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda() output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", to_dlpack(outptu0_pytorch)) else: if gpu_output[0]: output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack()) else: output0_pytorch = from_dlpack(input0.to_dlpack()).cpu() output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", to_dlpack(output0_pytorch)) next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT", gpu_output[1:]) infer_response = pb_utils.InferenceResponse([output0, next_gpu_output]) # Number of times to repeat the response response_repeat = 2 for _ in range(response_repeat): response_sender.send(infer_response) response_sender.send( flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) with self.inflight_thread_count_lck: self.inflight_thread_count -= 1
def execute(self, requests): responses = [] for request in requests: input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") print('ISCPU', input0.is_cpu()) gpu_output = pb_utils.get_input_tensor_by_name( request, "GPU_OUTPUT").as_numpy() if input0.is_cpu(): if not gpu_output[0]: output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", input0.to_dlpack()) else: outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda() output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", to_dlpack(outptu0_pytorch)) else: if gpu_output[0]: output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", input0.to_dlpack()) else: outptu0_pytorch = from_dlpack(input0.to_dlpack()).cpu() output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", to_dlpack(outptu0_pytorch)) next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT", gpu_output[1:]) responses.append( pb_utils.InferenceResponse([output0, next_gpu_output])) return responses
def execute(self, requests): output0_dtype = self.output0_dtype responses = [] for request in requests: in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") input_smiles = in_0.as_numpy()[0].decode() print('processing', input_smiles) generated_smiles, neighboring_embeddings, pad_mask = \ self.find_similars_smiles_list(input_smiles, num_requested=10, force_unique=True) out_0 = np.array(generated_smiles).astype(np.object) out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype)) # pb_utils.InferenceResponse( # output_tensors=..., TritonError("An error occured")) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor_0]) responses.append(inference_response) return responses
def response_thread(self, response_sender, index, in_input): # The response_sender is used to send response(s) associated with the # corresponding request. The first request will send errors and the # other requests will send the responses. The number of responses per # requests is the number of elements in input tensor. in_value = in_input out_output = pb_utils.Tensor("OUT", in_value) if index == 0: error = pb_utils.TritonError('An error occured during execution') response = pb_utils.InferenceResponse(output_tensors=[out_output], error=error) else: response = pb_utils.InferenceResponse(output_tensors=[out_output]) response_sender.send(response) # We must close the response sender to indicate to Triton that we are # done sending responses for the corresponding request. We can't use the # response sender after closing it. response_sender.send( flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) with self.inflight_thread_count_lck: self.inflight_thread_count -= 1
def execute(self, requests): responses = [] for _ in requests: if self._index % 2 == 0: out_tensor_0 = pb_utils.Tensor( "OUTPUT0", np.array(['123456'], dtype=self._dtypes[self._index % 3])) else: # Test sending strings with no elements out_tensor_0 = pb_utils.Tensor( "OUTPUT0", np.array([], dtype=self._dtypes[self._index % 3])) self._index += 1 responses.append(pb_utils.InferenceResponse([out_tensor_0])) return responses
def execute(self, requests): responses = [] for request in requests: input_tensor = pb_utils.get_input_tensor_by_name(request, "IN") out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy()) responses.append(pb_utils.InferenceResponse([out_tensor], error)) return responses
def execute(self, requests): """`execute` must be implemented in every Python model. `execute` function receives a list of pb_utils.InferenceRequest as the only argument. This function is called when an inference is requested for this model. Depending on the batching configuration (e.g. Dynamic Batching) used, `requests` may contain multiple requests. Every Python model, must create one pb_utils.InferenceResponse for every pb_utils.InferenceRequest in `requests`. If there is an error, you can set the error argument when creating a pb_utils.InferenceResponse. Parameters ---------- requests : list A list of pb_utils.InferenceRequest Returns ------- list A list of pb_utils.InferenceResponse. The length of this list must be the same as `requests` """ responses = [] # Every Python backend must iterate over everyone of the requests # and create a pb_utils.InferenceResponse for each of them. for request in requests: # Get INPUT0 input_ids = pb_utils.get_input_tensor_by_name( request, "input_ids").to_dlpack() attention_mask = pb_utils.get_input_tensor_by_name( request, "attention_mask").to_dlpack() # TODO: Set environment variable to prevent to(self.device) input_ids = from_dlpack(input_ids).long().to(self.device) attention_mask = from_dlpack(attention_mask).long().to(self.device) with torch.no_grad(): outputs = self.model(input_ids, attention_mask) conf, preds = torch.max(outputs, dim=1) preds = preds.int() out_tensor_0 = pb_utils.Tensor("preds", preds.cpu().numpy()) # Create InferenceResponse. You can set an error here in case # there was a problem with handling this inference request. # Below is an example of how you can set errors in inference # response: # # pb_utils.InferenceResponse( # output_tensors=..., TritonError("An error occured")) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor_0]) responses.append(inference_response) # You should return a list of pb_utils.InferenceResponse. Length # of this list must match the length of `requests` list. return responses
def _send_identity_tensor(self, size): tensor_size = [1, size] input0_np = np.random.randn(*tensor_size) input0 = pb_utils.Tensor('INPUT0', input0_np.astype(np.float32)) infer_request = pb_utils.InferenceRequest( model_name='identity_fp32', inputs=[input0], requested_output_names=['OUTPUT0']) return input0_np, infer_request.exec()
def test_bls_wrong_inputs(self): input0 = pb_utils.Tensor('INPUT0', np.random.randn(*[1, 16])) infer_request = pb_utils.InferenceRequest( model_name='add_sub', inputs=[input0], requested_output_names=['OUTPUT0', 'OUTPUT1']) infer_response = infer_request.exec() self.assertTrue(infer_response.has_error())
def execute(self, requests): responses = [] for _ in requests: out_tensor_0 = pb_utils.Tensor( "OUTPUT0", np.array(['123456'], dtype=self._dtypes[self._index])) self._index += 1 responses.append(pb_utils.InferenceResponse([out_tensor_0])) return responses
def execute(self, requests): """ This function is called on inference request. """ responses = [] for request in requests: input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0") out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy()) responses.append(pb_utils.InferenceResponse([out_tensor])) return responses
def response_thread(self, response_sender, in_input): # The response_sender is used to send response(s) associated with the # corresponding request. # Sleep 5 seconds to make sure the main thread has exited. time.sleep(5) status = self.execute_gpu_bls() if not status: infer_response = pb_utils.InferenceResponse( error="GPU BLS test failed.") response_sender.send(infer_response) else: in_value = in_input infer_request = pb_utils.InferenceRequest( model_name='identity_fp32', requested_output_names=["OUTPUT0"], inputs=[pb_utils.Tensor('INPUT0', in_input)]) infer_response = infer_request.exec() output0 = pb_utils.get_output_tensor_by_name( infer_response, "OUTPUT0") if infer_response.has_error(): response = pb_utils.InferenceResponse( error=infer_response.error().message()) response_sender.send( response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) elif np.any(in_input != output0.as_numpy()): error_message = ( "BLS Request input and BLS response output do not match." f" {in_value} != {output0.as_numpy()}") response = pb_utils.InferenceResponse(error=error_message) response_sender.send( response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) else: output_tensors = [pb_utils.Tensor('OUT', in_value)] response = pb_utils.InferenceResponse( output_tensors=output_tensors) response_sender.send( response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) with self.inflight_thread_count_lck: self.inflight_thread_count -= 1
def execute(self, requests): """ Identity model in Python backend. """ responses = [] for request in requests: input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0") out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy()) responses.append(pb_utils.InferenceResponse([out_tensor])) return responses
def execute(self, requests): """ This function is called on inference request. """ responses = [] for request in requests: input_tensor = pb_utils.get_input_tensor_by_name(request, "IN") out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy()) error = pb_utils.TritonError('An error occured during execution') responses.append(pb_utils.InferenceResponse([out_tensor], error)) return responses
def execute(self, requests): responses = [] for request in requests: in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") out_tensor_0 = pb_utils.Tensor( "OUTPUT0", in_0.as_numpy().astype(self._dtypes[self._index])) self._index += 1 responses.append(pb_utils.InferenceResponse([out_tensor_0])) return responses
def execute(self, requests): """`execute` must be implemented in every Python model. `execute` function receives a list of pb_utils.InferenceRequest as the only argument. This function is called when an inference is requested for this model. Depending on the batching configuration (e.g. Dynamic Batching) used, `requests` may contain multiple requests. Every Python model, must create one pb_utils.InferenceResponse for every pb_utils.InferenceRequest in `requests`. If there is an error, you can set the error argument when creating a pb_utils.InferenceResponse. Parameters ---------- requests : list A list of pb_utils.InferenceRequest Returns ------- list A list of pb_utils.InferenceResponse. The length of this list must be the same as `requests` """ responses = [] # print("num:", len(requests), flush=True) for request in requests: data = pb_utils.get_input_tensor_by_name(request, self.input_names[0]) data = data.as_numpy() data = [i[0].decode('utf-8') for i in data] data = self.tokenizer(data, max_length=128, padding=True, truncation=True) input_ids = np.array(data["input_ids"], dtype=self.output_dtype[0]) token_type_ids = np.array(data["token_type_ids"], dtype=self.output_dtype[1]) # print("input_ids:", input_ids) # print("token_type_ids:", token_type_ids) out_tensor1 = pb_utils.Tensor(self.output_names[0], input_ids) out_tensor2 = pb_utils.Tensor(self.output_names[1], token_type_ids) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor1, out_tensor2]) responses.append(inference_response) return responses
def test_dlpack_string_tensor(self): np_object = np.array(['An Example String'], dtype=np.object_) pb_tensor = pb_utils.Tensor('test_tensor', np_object) with self.assertRaises(Exception) as e: pb_tensor.to_dlpack() self.assertTrue( str(e.exception) == 'TYPE_BYTES tensors cannot be converted to DLPack.')
def test_dlpack_string_tensor(self): np_object = np.array(['An Example String'], dtype=np.object_) pb_tensor = pb_utils.Tensor('test_tensor', np_object) with self.assertRaises(Exception) as e: pb_tensor.to_dlpack() self.assertTrue( str(e.exception) == 'DLPack does not have support for string tensors.')
def execute(self, requests): output0_dtype = self.output0_dtype responses = [] for request in requests: acc_x = pb_utils.get_input_tensor_by_name(request, "ACC_X").as_numpy() acc_y = pb_utils.get_input_tensor_by_name(request, "ACC_Y").as_numpy() acc_z = pb_utils.get_input_tensor_by_name(request, "ACC_Z").as_numpy() gyro_x = pb_utils.get_input_tensor_by_name(request, "GYRO_X").as_numpy() gyro_y = pb_utils.get_input_tensor_by_name(request, "GYRO_Y").as_numpy() gyro_z = pb_utils.get_input_tensor_by_name(request, "GYRO_Z").as_numpy() humidity = pb_utils.get_input_tensor_by_name( request, "HUMIDITY").as_numpy() pressure = pb_utils.get_input_tensor_by_name( request, "PRESSURE").as_numpy() temp_hum = pb_utils.get_input_tensor_by_name( request, "TEMP_HUM").as_numpy() temp_press = pb_utils.get_input_tensor_by_name( request, "TEMP_PRESS").as_numpy() out_0 = np.array([ acc_y, acc_x, acc_z, pressure, temp_press, temp_hum, humidity, gyro_x, gyro_y, gyro_z ]).transpose() # ACC_Y ACC_X ACC_Z PRESSURE TEMP_PRESS TEMP_HUM HUMIDITY GYRO_X GYRO_Y GYRO_Z min = np.array([ -0.132551, -0.049693, 0.759847, 976.001709, 38.724998, 40.220890, 13.003981, -1.937896, -0.265019, -0.250647 ]) max = np.array([ 0.093099, 0.150289, 1.177543, 1007.996338, 46.093750, 48.355824, 23.506138, 1.923712, 0.219204, 0.671759 ]) # MinMax scaling out_0_scaled = (out_0 - min) / (max - min) # Create output tensor out_tensor_0 = pb_utils.Tensor("INPUT0", out_0_scaled.astype(output0_dtype)) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor_0]) responses.append(inference_response) return responses
def execute(self, requests): """ The body of this model doesn't matter. The main purpose of this model is to test correct handling of Python errors in the `finalize` function. """ responses = [] for request in requests: input_tensor = pb_utils.get_input_tensor_by_name(request, "IN") out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy()) responses.append(pb_utils.InferenceResponse([out_tensor], error)) return responses
async def execute(self, requests): responses = [] for _ in requests: # Run the unittest and store the results in InferenceResponse. result = await test_bls_out_of_memory() responses.append( pb_utils.InferenceResponse([ pb_utils.Tensor('OUTPUT0', np.array([result], dtype=np.float16)) ])) return responses