def predict(self, request_data, request_timeout=10): self.logger.info('Sending request to tfserving model') self.logger.info('Host: {}'.format(self.host)) self.logger.info('Model name: {}'.format(self.model_name)) self.logger.info('Model version: {}'.format(self.model_version)) # Create gRPC client and request t = time.time() channel = grpc.insecure_channel(self.host) self.logger.debug( 'Establishing insecure channel took: {}'.format(time.time() - t)) t = time.time() stub = PredictionServiceStub(channel) self.logger.debug('Creating stub took: {}'.format(time.time() - t)) t = time.time() request = PredictRequest() self.logger.debug( 'Creating request object took: {}'.format(time.time() - t)) request.model_spec.name = self.model_name request.model_spec.signature_name = 'predict_images' if self.model_version > 0: request.model_spec.version.value = self.model_version request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(request_data, shape=[1, request_data.size])) try: t = time.time() predict_response = stub.Predict(request, timeout=request_timeout) self.logger.debug( 'Actual request took: {} seconds'.format(time.time() - t)) predict_response_dict = predict_response_to_dict(predict_response) keys = [k for k in predict_response_dict] self.logger.info('Got predict_response with keys: {}'.format(keys)) return predict_response_dict except RpcError as e: self.logger.error(e) self.logger.error('Prediction failed!') return {}
def send_prediction_request(host, port, model_name, model_version, model_signatures, signature_name, inputs_to_predict, batch_size): """ Sends sequence of inputs to a TF-serving model server for prediction :param host: (str) TF-serving model server host :param port: (int) TF-serving model server port :param model_name: (str) name of model in TF-serving model server that you want to predict with :param model_version: (int) version of model in TF-serving model server that you want to predict with :param model_signatures: (dict) :param signature_name: (dict) :param inputs_to_predict: (list(dict(object))) list of inputs (where each input is a dict mapping from input name to input object, named according to the model_spec) :param batch_size: (int) desired batch size with which to batch predictions :return list of model predictions """ # TODO: add support for REST API # TODO: handle multiple outputs, not assuming only one try: # create connection stub to TF serving server channel = grpc.insecure_channel(f'{host}:{port}') stub = PredictionServiceStub(channel) # format request for each batch & get result from server model_spec_output_name = list( model_signatures[signature_name]['output'].keys())[0] input_batches = [ inputs_to_predict[i:i + batch_size] for i in range(0, len(inputs_to_predict), batch_size) ] # format request for each batch & get result from server predictions = [] for input_batch in input_batches: grpc_request = format_grpc_request(model_name, model_version, model_signatures, signature_name, input_batch) result = stub.Predict(grpc_request) batch_predictions = list( result.outputs[model_spec_output_name].float_val) predictions += list(batch_predictions) return predictions except Exception as e: print(e) return False
def grpc_request( stub: prediction_service_pb2_grpc.PredictionServiceStub, data_sample: Any, input_name: str, model_name: str, signature_name: str, callback: Optional = None, grpc_timeout: int = 20, async_: bool = False, ): request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.signature_name = signature_name request.inputs[input_name].CopyFrom(tf.make_tensor_proto(data_sample, shape=data_sample.shape)) if async_: result_future = stub.Predict.future(request, 5) # 5 seconds else: result_future = stub.Predict(request, grpc_timeout) if callback is not None: return callback(result_future) return result_future
def send_recurrent_inference_request( hidden_state: np.ndarray, action: np.ndarray, predict_service: prediction_service_pb2_grpc.PredictionServiceStub ) -> core.NetworkOutput: """Recurrent inference for the agent, used during MCTS.""" request = predict_pb2.PredictRequest() request.model_spec.name = FLAGS.recurrent_inference_model_name request.model_spec.signature_name = 'recurrent_inference' request.inputs['hidden_state'].CopyFrom( tf.make_tensor_proto(values=tf.expand_dims(hidden_state, axis=0))) request.inputs['action'].CopyFrom( tf.make_tensor_proto( values=np.expand_dims(action, axis=0).astype(np.int32))) response = predict_service.Predict(request) # Parse and `unbatch` the response. map_names = { f'output_{i}': v for (i, v) in enumerate([ 'value', 'value_logits', 'reward', 'reward_logits', 'policy_logits', 'hidden_state' ]) } outputs = { map_names[k]: tf.make_ndarray(v).squeeze() for k, v in response.outputs.items() } return core.NetworkOutput(**outputs)
def create_prediction_service_stub(ip: str, port: int) -> PredictionServiceStub: """ :return: PredictionServiceStub """ channel = grpc.insecure_channel('{}:{}'.format(ip, port)) return PredictionServiceStub(channel)
def send_initial_inference_request( predict_service: prediction_service_pb2_grpc.PredictionServiceStub, inputs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray] ) -> core.NetworkOutput: """Initial inference for the agent, used at the beginning of MCTS.""" input_ids, input_type_ids, input_features, action_history = inputs request = predict_pb2.PredictRequest() request.model_spec.name = FLAGS.initial_inference_model_name request.model_spec.signature_name = 'initial_inference' request.inputs['input_ids'].CopyFrom( tf.make_tensor_proto(values=np.expand_dims(input_ids, axis=0))) request.inputs['segment_ids'].CopyFrom( tf.make_tensor_proto(values=np.expand_dims(input_type_ids, axis=0))) request.inputs['features'].CopyFrom( tf.make_tensor_proto(values=np.expand_dims(input_features, axis=0))) request.inputs['action_history'].CopyFrom( tf.make_tensor_proto(values=np.expand_dims(action_history, axis=0))) response = predict_service.Predict(request) # Parse and `unbatch` the response. map_names = { f'output_{i}': v for (i, v) in enumerate([ 'value', 'value_logits', 'reward', 'reward_logits', 'policy_logits', 'hidden_state' ]) } outputs = { map_names[k]: tf.make_ndarray(v).squeeze() for k, v in response.outputs.items() } return core.NetworkOutput(**outputs)
def _make_inference_request( self, model_name: str, input_dict: Dict[str, np.ndarray], request_pb: RequestTypes, timeout: int, model_version: Optional[int], ) -> ResponseTypes: stub = PredictionServiceStub(self._channel) request = request_pb() request.model_spec.name = model_name if model_version is not None: request.model_spec.version.value = model_version for k, v in input_dict.items(): request.inputs[k].CopyFrom(ndarray_to_tensor_proto(v)) return stub.Predict(request, timeout)
class GRPCPredictionAPI: """Class for interacting with TensorFlow Serving server using gRPC""" def __init__(self, host: str = "localhost", port: int = 8500): self.host = host self.port = port self.url = f"{self.host}:{self.port}" channel = grpc.insecure_channel(self.url) self.stub = PredictionServiceStub(channel) def get_prediction( self, model_name: str, model_version: int, inputs: np.ndarray, input_layer_name: str, output_layer_name: str, input_shape: Tuple[int], output_shape: Tuple[int] = None ) -> np.ndarray: """Get predictions from TensorFlow Serving server, from the specified model, version and input. Args: model_name (str): Model name model_version (int): Version of model inputs (np.ndarray): Input as a NumPy array, in the correct shape as expected by the model. This may require an extra axis for number of instances of the input e.g. (1, 224, 224, 3) input_layer_name (str): Input layer name in model output_layer_name (str): Output layer in model input_shape (Tuple[int]): Shape of the input. Depending on the model, an extra first axis may be required which encodes the number of instances of the input e.g. (1, 224, 224, 3) output_shape (Tuple[int]): Shape of the model output, where typically the first axis is the number of instances of the input provided. Returns: np.ndarray: Predictions from model """ request = PredictRequest() request.model_spec.name = model_name request.model_spec.signature_name = "serving_default" request.inputs[input_layer_name].CopyFrom( tf.make_tensor_proto( inputs.astype(np.float32), shape=input_shape ) ) result = self.stub.Predict(request) return np.array(result.outputs[output_layer_name].float_val).reshape(output_shape)
def doTest(host, port): from tensorflow_serving.apis.predict_pb2 import PredictRequest from tensorflow_serving.apis.prediction_service_pb2_grpc import PredictionServiceStub from grpc import insecure_channel, StatusCode from tensorflow.contrib.util import make_tensor_proto, make_ndarray from tensorflow import float32 target = "%s:%s"%(host, port) print "Sending prediction request to", target, "\n" channel = insecure_channel(target) stub = PredictionServiceStub(channel) request = PredictRequest() request.model_spec.name = "campaign" request.model_spec.signature_name = "" request.inputs["hour"].CopyFrom(make_tensor_proto(6, shape=[1], dtype=float32)) request.inputs["week"].CopyFrom(make_tensor_proto(5, shape=[1], dtype=float32)) request.inputs["sid"].CopyFrom(make_tensor_proto("47320", shape=[1])) request.inputs["sspid"].CopyFrom(make_tensor_proto("3", shape=[1])) request.inputs["country"].CopyFrom(make_tensor_proto("DK", shape=[1])) request.inputs["os"].CopyFrom(make_tensor_proto("6", shape=[1])) request.inputs["domain"].CopyFrom(make_tensor_proto("video9.in", shape=[1])) request.inputs["isp"].CopyFrom(make_tensor_proto("Tele Danmark", shape=[1])) request.inputs["browser"].CopyFrom(make_tensor_proto("4", shape=[1])) request.inputs["type"].CopyFrom(make_tensor_proto("site", shape=[1])) request.inputs["lat"].CopyFrom(make_tensor_proto(35000, shape=[1], dtype=float32)) request.inputs["lon"].CopyFrom(make_tensor_proto(105000, shape=[1], dtype=float32)) request.inputs["connectiontype"].CopyFrom(make_tensor_proto("2", shape=[1])) request.inputs["devicetype"].CopyFrom(make_tensor_proto("1", shape=[1])) request.inputs["donottrack"].CopyFrom(make_tensor_proto("0", shape=[1])) request.inputs["userid"].CopyFrom(make_tensor_proto("984273063", shape=[1])) request.inputs["ua"].CopyFrom(make_tensor_proto("Mozilla/5.0 (Linux; U; Android 5.1.1; en-US; Redmi Note 3 Build/LMY47V) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/11.0.8.855 U3/0.8.0 Mobile Safari/534.30", shape=[1])) (result, status) = stub.Predict.with_call(request) if status.code() != StatusCode.OK: print "call failed", status return predictions = make_ndarray(result.outputs["classes"]) if predictions.size == 0: print "no predition replied" return cidIndex = predictions[0] print "Server predict with index", cidIndex
def get_model_version( model_name: str, stub: prediction_service_pb2_grpc.PredictionServiceStub) -> str: """Returns the version of the model. Parameters ---------- model_name : str stub : prediction_service_pb2_grpc.PredictionServiceStub Prediction API. Returns ------- str Version of the model. """ request = get_model_metadata_pb2.GetModelMetadataRequest() request.model_spec.name = model_name request.metadata_field.append("signature_def") response = stub.GetModelMetadata(request, 10) return response.model_spec.version.value
def _initialize_worker(server_address): global _worker_channel_singleton # pylint: disable=global-statement global _worker_stub_singleton # pylint: disable=global-statement logger.info('Initializing worker process.') _worker_channel_singleton = grpc.insecure_channel(server_address) _worker_stub_singleton = PredictionServiceStub(_worker_channel_singleton)
def predict(self, request_data, request_timeout=10): self.logger.info('Sending request to tfserving model') self.logger.info('Host: {}'.format(self.host)) self.logger.info('Model name: {}'.format(self.model_name)) self.logger.info('Model version: {}'.format(self.model_version)) image = Image.open(request_data) image = image.resize((224, 224), Image.NEAREST) image = np.asarray(image).reshape((1, 224, 224, 3)) # Create gRPC client and request t = time.time() channel = grpc.insecure_channel(self.host) self.logger.debug( 'Establishing insecure channel took: {}'.format(time.time() - t)) t = time.time() stub = PredictionServiceStub(channel) self.logger.debug('Creating stub took: {}'.format(time.time() - t)) t = time.time() request = PredictRequest() self.logger.debug( 'Creating request object took: {}'.format(time.time() - t)) request.model_spec.name = self.model_name request.model_spec.signature_name = 'predict_images' #request.model_spec.signature_name = tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME if self.model_version > 0: request.model_spec.version.value = self.model_version #pic = Image.open(request_data) #image = np.asarray(Image.open(request_data) ) print("Image shape:", image.shape) #foo = tf.contrib.util.make_tensor_proto(image.astype(dtype=np.float32), shape=[1, 224, 224, 3]) #print("tensor shape:", foo.get_shape() ) request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image.astype(dtype=np.float32), shape=[1, 224, 224, 3])) try: t = time.time() predict_response = stub.Predict(request, timeout=request_timeout) self.logger.debug( 'Actual request took: {} seconds'.format(time.time() - t)) predict_response_dict = predict_response_to_dict(predict_response) keys = [k for k in predict_response_dict] self.logger.info('Got predict_response with keys: {}'.format(keys)) return predict_response_dict except RpcError as e: self.logger.error(e) self.logger.error('Prediction failed!') return {}
#!/usr/bin/env python import cv2 import numpy as np import tensorflow as tf from tensorflow_serving.apis.prediction_service_pb2_grpc import PredictionServiceStub from tensorflow_serving.apis.predict_pb2 import PredictRequest import grpc import time image_data = cv2.imread("image.jpeg") inputs = np.array([image_data]) channel = grpc.insecure_channel("localhost:8900") stub = PredictionServiceStub(channel) request = PredictRequest() request.model_spec.name = "retinanet" request.inputs["inputs"].CopyFrom( tf.contrib.util.make_tensor_proto(inputs, shape=inputs.shape)) result = stub.Predict(request, 60) boxes = tf.make_ndarray(result.outputs["detection_boxes"]) scores = tf.make_ndarray(result.outputs["detection_scores"]) labels = tf.make_ndarray(result.outputs["detection_classes"]) num_detections = tf.make_ndarray(result.outputs["num_detections"]) def box_normal_to_pixel(box, dim, scalefactor=1):
def __init__(self, host: str = "localhost", port: int = 8500): self.host = host self.port = port self.url = f"{self.host}:{self.port}" channel = grpc.insecure_channel(self.url) self.stub = PredictionServiceStub(channel)
def _retry_grpc(self, request, request_timeout): request_name = request.__class__.__name__ self.logger.info('Sending %s to %s.', request_name, self.host) true_failures, count = 0, 0 retrying = True while retrying: with self.insecure_channel() as channel: # pylint: disable=E1101 try: t = timeit.default_timer() stub = PredictionServiceStub(channel) api_endpoint_name = self.stub_lookup.get(request.__class__) api_call = getattr(stub, api_endpoint_name) response = api_call(request, timeout=request_timeout) self.logger.debug( '%s finished in %s seconds (%s retries).', request_name, timeit.default_timer() - t, true_failures) return response except grpc.RpcError as err: if true_failures > settings.MAX_RETRY > 0: retrying = False self.logger.error( '%s has failed %s times due to err ' '%s', request_name, count, err) raise err if err.code() in self.retry_status_codes: count += 1 is_true_failure = err.code( ) != grpc.StatusCode.UNAVAILABLE true_failures += int(is_true_failure) self.logger.warning( '%sException `%s: %s` during ' '%s %s to model %s:%s. Waiting %s ' 'seconds before retrying.', type(err).__name__, err.code().name, err.details(), self.__class__.__name__, request_name, self.model_name, self.model_version, settings.GRPC_BACKOFF) time.sleep(settings.GRPC_BACKOFF) # sleep before retry retrying = True # Unneccessary but explicit else: retrying = False raise err except Exception as err: retrying = False self.logger.error( 'Encountered %s during %s to model ' '%s:%s: %s', type(err).__name__, request_name, self.model_name, self.model_version, err) raise err