def requestGenerator(input_name, output_name, c, h, w, format, dtype, FLAGS): request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = FLAGS.model_name request.model_version = FLAGS.model_version output = grpc_service_v2_pb2.ModelInferRequest( ).InferRequestedOutputTensor() output.name = output_name request.outputs.extend([output]) input = grpc_service_v2_pb2.ModelInferRequest().InferInputTensor() input.name = input_name input.datatype = dtype if format == mc.ModelInput.FORMAT_NHWC: input.shape.extend([FLAGS.batch_size, h, w, c]) else: input.shape.extend([FLAGS.batch_size, c, h, w]) # Preprocess image into input data according to model requirements image_data = None with Image.open(FLAGS.image_filename) as img: image_data = preprocess(img, format, dtype, c, h, w, FLAGS.scaling) # Send requests of FLAGS.batch_size images. input_bytes = None for idx in range(FLAGS.batch_size): if input_bytes is None: input_bytes = image_data.tobytes() else: input_bytes += image_data.tobytes() input_contents = grpc_service_v2_pb2.InferTensorContents() input_contents.raw_contents = input_bytes input.contents.CopyFrom(input_contents) request.inputs.extend([input]) yield request
def _get_inference_request(self, inputs, outputs, model_name, model_version, request_id, sequence_id): """Creates and initializes an inference request. Parameters ---------- inputs : list A list of InferInput objects, each describing data for a input tensor required by the model. outputs : list A list of InferOutput objects, each describing how the output data must be returned. Only the output tensors present in the list will be requested from the server. model_name: str The name of the model to run inference. model_version: int The version of the model to run inference. If -1 is given the server will choose a version based on the model and internal policy. request_id: string Optional identifier for the request. If specified will be returned in the response. Default value is 'None' which means no request_id will be used. sequence_id : int The sequence ID of the inference request. Default is 0, which indicates that the request is not part of a sequence. The sequence ID is used to indicate that two or more inference requests are in the same sequence. """ self._request = grpc_service_v2_pb2.ModelInferRequest() self._request.model_name = model_name self._request.model_version = model_version if request_id != None: self._request.id = request_id if sequence_id != None: self._request.sequence_id = sequence_id for infer_input in inputs: self._request.inputs.extend([infer_input._get_tensor()]) for infer_output in outputs: self._request.outputs.extend([infer_output._get_tensor()])
response = grpc_stub.ServerMetadata(request) print("server metadata:\n{}".format(response)) request = grpc_service_v2_pb2.ModelMetadataRequest( name="resnet_v1_50_graphdef", version=-1) response = grpc_stub.ModelMetadata(request) print("model metadata:\n{}".format(response)) # Configuration request = grpc_service_v2_pb2.ModelConfigRequest( name="resnet_v1_50_graphdef", version=-1) response = grpc_stub.ModelConfig(request) print("model config:\n{}".format(response)) # Infer request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = "resnet_v1_50_graphdef" request.model_version = -1 request.id = "my request id" input = grpc_service_v2_pb2.ModelInferRequest().InferInputTensor() input.name = "input" input.datatype = "FP32" input.shape.extend([1, 224, 224, 3]) input_contents = grpc_service_v2_pb2.InferTensorContents() input_contents.raw_contents = bytes(602112 * 'a', 'utf-8') input.contents.CopyFrom(input_contents) request.inputs.extend([input])
def __init__(self, name, data_format="binary"): self._output = grpc_service_v2_pb2.ModelInferRequest( ).InferRequestedOutputTensor() self._output.name = name self._output.data_format = data_format
def __init__(self, name): self._input = grpc_service_v2_pb2.ModelInferRequest().InferInputTensor( ) self._input.name = name