예제 #1
0
    def predict(self, request_data, request_timeout=10):

        self.logger.info('Sending request to tfserving model')
        self.logger.info('Host: {}'.format(self.host))
        self.logger.info('Model name: {}'.format(self.model_name))
        self.logger.info('Model version: {}'.format(self.model_version))

        # Create gRPC client and request
        t = time.time()
        channel = grpc.insecure_channel(self.host)
        self.logger.debug(
            'Establishing insecure channel took: {}'.format(time.time() - t))

        t = time.time()
        stub = PredictionServiceStub(channel)
        self.logger.debug('Creating stub took: {}'.format(time.time() - t))

        t = time.time()
        request = PredictRequest()
        self.logger.debug(
            'Creating request object took: {}'.format(time.time() - t))

        request.model_spec.name = self.model_name
        request.model_spec.signature_name = 'predict_images'

        if self.model_version > 0:
            request.model_spec.version.value = self.model_version

        request.inputs['images'].CopyFrom(
            tf.contrib.util.make_tensor_proto(request_data,
                                              shape=[1, request_data.size]))

        try:
            t = time.time()
            predict_response = stub.Predict(request, timeout=request_timeout)

            self.logger.debug(
                'Actual request took: {} seconds'.format(time.time() - t))

            predict_response_dict = predict_response_to_dict(predict_response)

            keys = [k for k in predict_response_dict]
            self.logger.info('Got predict_response with keys: {}'.format(keys))

            return predict_response_dict

        except RpcError as e:
            self.logger.error(e)
            self.logger.error('Prediction failed!')

        return {}
예제 #2
0
def send_prediction_request(host, port, model_name, model_version,
                            model_signatures, signature_name,
                            inputs_to_predict, batch_size):
    """ Sends sequence of inputs to a TF-serving model server for prediction

    :param host: (str) TF-serving model server host
    :param port: (int) TF-serving model server port
    :param model_name: (str) name of model in TF-serving model server that you want to predict with
    :param model_version: (int) version of model in TF-serving model server that you want to predict with
    :param model_signatures: (dict)
    :param signature_name: (dict)
    :param inputs_to_predict: (list(dict(object))) list of inputs (where each input is a dict mapping from input name
                                                    to input object, named according to the model_spec)
    :param batch_size: (int) desired batch size with which to batch predictions
    :return list of model predictions
    """

    # TODO: add support for REST API
    # TODO: handle multiple outputs, not assuming only one

    try:
        # create connection stub to TF serving server
        channel = grpc.insecure_channel(f'{host}:{port}')
        stub = PredictionServiceStub(channel)

        # format request for each batch & get result from server
        model_spec_output_name = list(
            model_signatures[signature_name]['output'].keys())[0]
        input_batches = [
            inputs_to_predict[i:i + batch_size]
            for i in range(0, len(inputs_to_predict), batch_size)
        ]

        # format request for each batch & get result from server
        predictions = []
        for input_batch in input_batches:
            grpc_request = format_grpc_request(model_name, model_version,
                                               model_signatures,
                                               signature_name, input_batch)
            result = stub.Predict(grpc_request)
            batch_predictions = list(
                result.outputs[model_spec_output_name].float_val)
            predictions += list(batch_predictions)

        return predictions
    except Exception as e:
        print(e)
        return False
예제 #3
0
def grpc_request(
    stub: prediction_service_pb2_grpc.PredictionServiceStub,
    data_sample: Any,
    input_name: str,
    model_name: str,
    signature_name: str,
    callback: Optional = None,
    grpc_timeout: int = 20,
    async_: bool = False,
):
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.signature_name = signature_name

    request.inputs[input_name].CopyFrom(tf.make_tensor_proto(data_sample, shape=data_sample.shape))

    if async_:
        result_future = stub.Predict.future(request, 5)  # 5 seconds
    else:
        result_future = stub.Predict(request, grpc_timeout)

    if callback is not None:
        return callback(result_future)

    return result_future
예제 #4
0
def send_recurrent_inference_request(
    hidden_state: np.ndarray, action: np.ndarray,
    predict_service: prediction_service_pb2_grpc.PredictionServiceStub
) -> core.NetworkOutput:
  """Recurrent inference for the agent, used during MCTS."""
  request = predict_pb2.PredictRequest()
  request.model_spec.name = FLAGS.recurrent_inference_model_name
  request.model_spec.signature_name = 'recurrent_inference'

  request.inputs['hidden_state'].CopyFrom(
      tf.make_tensor_proto(values=tf.expand_dims(hidden_state, axis=0)))
  request.inputs['action'].CopyFrom(
      tf.make_tensor_proto(
          values=np.expand_dims(action, axis=0).astype(np.int32)))
  response = predict_service.Predict(request)

  # Parse and `unbatch` the response.
  map_names = {
      f'output_{i}': v for (i, v) in enumerate([
          'value', 'value_logits', 'reward', 'reward_logits', 'policy_logits',
          'hidden_state'
      ])
  }
  outputs = {
      map_names[k]: tf.make_ndarray(v).squeeze()
      for k, v in response.outputs.items()
  }

  return core.NetworkOutput(**outputs)
예제 #5
0
def create_prediction_service_stub(ip: str,
                                   port: int) -> PredictionServiceStub:
    """
    :return: PredictionServiceStub
    """
    channel = grpc.insecure_channel('{}:{}'.format(ip, port))
    return PredictionServiceStub(channel)
예제 #6
0
def send_initial_inference_request(
    predict_service: prediction_service_pb2_grpc.PredictionServiceStub,
    inputs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
) -> core.NetworkOutput:
  """Initial inference for the agent, used at the beginning of MCTS."""
  input_ids, input_type_ids, input_features, action_history = inputs

  request = predict_pb2.PredictRequest()
  request.model_spec.name = FLAGS.initial_inference_model_name
  request.model_spec.signature_name = 'initial_inference'

  request.inputs['input_ids'].CopyFrom(
      tf.make_tensor_proto(values=np.expand_dims(input_ids, axis=0)))
  request.inputs['segment_ids'].CopyFrom(
      tf.make_tensor_proto(values=np.expand_dims(input_type_ids, axis=0)))
  request.inputs['features'].CopyFrom(
      tf.make_tensor_proto(values=np.expand_dims(input_features, axis=0)))
  request.inputs['action_history'].CopyFrom(
      tf.make_tensor_proto(values=np.expand_dims(action_history, axis=0)))
  response = predict_service.Predict(request)

  # Parse and `unbatch` the response.
  map_names = {
      f'output_{i}': v for (i, v) in enumerate([
          'value', 'value_logits', 'reward', 'reward_logits', 'policy_logits',
          'hidden_state'
      ])
  }
  outputs = {
      map_names[k]: tf.make_ndarray(v).squeeze()
      for k, v in response.outputs.items()
  }

  return core.NetworkOutput(**outputs)
예제 #7
0
    def _make_inference_request(
        self,
        model_name: str,
        input_dict: Dict[str, np.ndarray],
        request_pb: RequestTypes,
        timeout: int,
        model_version: Optional[int],
    ) -> ResponseTypes:
        stub = PredictionServiceStub(self._channel)
        request = request_pb()
        request.model_spec.name = model_name

        if model_version is not None:
            request.model_spec.version.value = model_version

        for k, v in input_dict.items():
            request.inputs[k].CopyFrom(ndarray_to_tensor_proto(v))
        return stub.Predict(request, timeout)
예제 #8
0
class GRPCPredictionAPI:
    """Class for interacting with TensorFlow Serving server using gRPC"""

    def __init__(self, host: str = "localhost", port: int = 8500):
        self.host = host
        self.port = port
        self.url = f"{self.host}:{self.port}"
        channel = grpc.insecure_channel(self.url)
        self.stub = PredictionServiceStub(channel)

    def get_prediction(
        self,
        model_name: str,
        model_version: int,
        inputs: np.ndarray,
        input_layer_name: str,
        output_layer_name: str,
        input_shape: Tuple[int],
        output_shape: Tuple[int] = None
    ) -> np.ndarray:
        """Get predictions from TensorFlow Serving server, from the specified
        model, version and input.

        Args:
            model_name (str): Model name
            model_version (int): Version of model
            inputs (np.ndarray): Input as a NumPy array, in the correct shape
                as expected by the model. This may require an extra axis for
                number of instances of the input e.g. (1, 224, 224, 3)
            input_layer_name (str): Input layer name in model
            output_layer_name (str): Output layer in model
            input_shape (Tuple[int]): Shape of the input. Depending on the
                model, an extra first axis may be required which encodes
                the number of instances of the input e.g. (1, 224, 224, 3)
            output_shape (Tuple[int]): Shape of the model output, where
                typically the first axis is the number of instances of the
                input provided.

        Returns:
            np.ndarray: Predictions from model
        """
        request = PredictRequest()
        request.model_spec.name = model_name
        request.model_spec.signature_name = "serving_default"
        request.inputs[input_layer_name].CopyFrom(
            tf.make_tensor_proto(
                inputs.astype(np.float32), shape=input_shape
            )
        )
        result = self.stub.Predict(request)
        return np.array(result.outputs[output_layer_name].float_val).reshape(output_shape)
예제 #9
0
def doTest(host, port):
    from tensorflow_serving.apis.predict_pb2 import PredictRequest
    from tensorflow_serving.apis.prediction_service_pb2_grpc import PredictionServiceStub
    from grpc import insecure_channel, StatusCode
    from tensorflow.contrib.util import make_tensor_proto, make_ndarray
    from tensorflow import float32

    target = "%s:%s"%(host, port)

    print "Sending prediction request to", target, "\n"

    channel = insecure_channel(target)
    stub = PredictionServiceStub(channel)

    request = PredictRequest()
    request.model_spec.name = "campaign"
    request.model_spec.signature_name = ""

    request.inputs["hour"].CopyFrom(make_tensor_proto(6, shape=[1], dtype=float32))
    request.inputs["week"].CopyFrom(make_tensor_proto(5, shape=[1], dtype=float32))
    request.inputs["sid"].CopyFrom(make_tensor_proto("47320", shape=[1]))
    request.inputs["sspid"].CopyFrom(make_tensor_proto("3", shape=[1]))
    request.inputs["country"].CopyFrom(make_tensor_proto("DK", shape=[1]))
    request.inputs["os"].CopyFrom(make_tensor_proto("6", shape=[1]))
    request.inputs["domain"].CopyFrom(make_tensor_proto("video9.in", shape=[1]))
    request.inputs["isp"].CopyFrom(make_tensor_proto("Tele Danmark", shape=[1]))
    request.inputs["browser"].CopyFrom(make_tensor_proto("4", shape=[1]))
    request.inputs["type"].CopyFrom(make_tensor_proto("site", shape=[1]))
    request.inputs["lat"].CopyFrom(make_tensor_proto(35000, shape=[1], dtype=float32))
    request.inputs["lon"].CopyFrom(make_tensor_proto(105000, shape=[1], dtype=float32))
    request.inputs["connectiontype"].CopyFrom(make_tensor_proto("2", shape=[1]))
    request.inputs["devicetype"].CopyFrom(make_tensor_proto("1", shape=[1]))
    request.inputs["donottrack"].CopyFrom(make_tensor_proto("0", shape=[1]))
    request.inputs["userid"].CopyFrom(make_tensor_proto("984273063", shape=[1]))
    request.inputs["ua"].CopyFrom(make_tensor_proto("Mozilla/5.0 (Linux; U; Android 5.1.1; en-US; Redmi Note 3 Build/LMY47V) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/11.0.8.855 U3/0.8.0 Mobile Safari/534.30", shape=[1]))

    (result, status) = stub.Predict.with_call(request)

    if status.code() != StatusCode.OK:
        print "call failed", status
        return

    predictions = make_ndarray(result.outputs["classes"])

    if predictions.size == 0:
        print "no predition replied"
        return

    cidIndex = predictions[0]
    print "Server predict with index", cidIndex
def get_model_version(
        model_name: str,
        stub: prediction_service_pb2_grpc.PredictionServiceStub) -> str:
    """Returns the version of the model.

    Parameters
    ----------
    model_name : str
    stub : prediction_service_pb2_grpc.PredictionServiceStub
        Prediction API.

    Returns
    -------
    str
        Version of the model.

    """
    request = get_model_metadata_pb2.GetModelMetadataRequest()
    request.model_spec.name = model_name
    request.metadata_field.append("signature_def")
    response = stub.GetModelMetadata(request, 10)
    return response.model_spec.version.value
예제 #11
0
def _initialize_worker(server_address):
    global _worker_channel_singleton  # pylint: disable=global-statement
    global _worker_stub_singleton  # pylint: disable=global-statement
    logger.info('Initializing worker process.')
    _worker_channel_singleton = grpc.insecure_channel(server_address)
    _worker_stub_singleton = PredictionServiceStub(_worker_channel_singleton)
    def predict(self, request_data, request_timeout=10):

        self.logger.info('Sending request to tfserving model')
        self.logger.info('Host: {}'.format(self.host))
        self.logger.info('Model name: {}'.format(self.model_name))
        self.logger.info('Model version: {}'.format(self.model_version))

        image = Image.open(request_data)
        image = image.resize((224, 224), Image.NEAREST)

        image = np.asarray(image).reshape((1, 224, 224, 3))

        # Create gRPC client and request
        t = time.time()
        channel = grpc.insecure_channel(self.host)
        self.logger.debug(
            'Establishing insecure channel took: {}'.format(time.time() - t))

        t = time.time()
        stub = PredictionServiceStub(channel)
        self.logger.debug('Creating stub took: {}'.format(time.time() - t))

        t = time.time()
        request = PredictRequest()
        self.logger.debug(
            'Creating request object took: {}'.format(time.time() - t))

        request.model_spec.name = self.model_name
        request.model_spec.signature_name = 'predict_images'
        #request.model_spec.signature_name = tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME

        if self.model_version > 0:
            request.model_spec.version.value = self.model_version

        #pic = Image.open(request_data)
        #image = np.asarray(Image.open(request_data) )
        print("Image shape:", image.shape)

        #foo =  tf.contrib.util.make_tensor_proto(image.astype(dtype=np.float32), shape=[1, 224, 224, 3])
        #print("tensor shape:", foo.get_shape() )

        request.inputs['images'].CopyFrom(
            tf.contrib.util.make_tensor_proto(image.astype(dtype=np.float32),
                                              shape=[1, 224, 224, 3]))

        try:
            t = time.time()
            predict_response = stub.Predict(request, timeout=request_timeout)

            self.logger.debug(
                'Actual request took: {} seconds'.format(time.time() - t))

            predict_response_dict = predict_response_to_dict(predict_response)

            keys = [k for k in predict_response_dict]
            self.logger.info('Got predict_response with keys: {}'.format(keys))

            return predict_response_dict

        except RpcError as e:
            self.logger.error(e)
            self.logger.error('Prediction failed!')

        return {}
예제 #13
0
#!/usr/bin/env python

import cv2
import numpy as np
import tensorflow as tf
from tensorflow_serving.apis.prediction_service_pb2_grpc import PredictionServiceStub
from tensorflow_serving.apis.predict_pb2 import PredictRequest
import grpc
import time

image_data = cv2.imread("image.jpeg")
inputs = np.array([image_data])

channel = grpc.insecure_channel("localhost:8900")
stub = PredictionServiceStub(channel)

request = PredictRequest()
request.model_spec.name = "retinanet"

request.inputs["inputs"].CopyFrom(
    tf.contrib.util.make_tensor_proto(inputs, shape=inputs.shape))

result = stub.Predict(request, 60)

boxes = tf.make_ndarray(result.outputs["detection_boxes"])
scores = tf.make_ndarray(result.outputs["detection_scores"])
labels = tf.make_ndarray(result.outputs["detection_classes"])
num_detections = tf.make_ndarray(result.outputs["num_detections"])


def box_normal_to_pixel(box, dim, scalefactor=1):
예제 #14
0
 def __init__(self, host: str = "localhost", port: int = 8500):
     self.host = host
     self.port = port
     self.url = f"{self.host}:{self.port}"
     channel = grpc.insecure_channel(self.url)
     self.stub = PredictionServiceStub(channel)
    def _retry_grpc(self, request, request_timeout):
        request_name = request.__class__.__name__
        self.logger.info('Sending %s to %s.', request_name, self.host)

        true_failures, count = 0, 0

        retrying = True
        while retrying:
            with self.insecure_channel() as channel:
                # pylint: disable=E1101
                try:
                    t = timeit.default_timer()

                    stub = PredictionServiceStub(channel)

                    api_endpoint_name = self.stub_lookup.get(request.__class__)
                    api_call = getattr(stub, api_endpoint_name)
                    response = api_call(request, timeout=request_timeout)

                    self.logger.debug(
                        '%s finished in %s seconds (%s retries).',
                        request_name,
                        timeit.default_timer() - t, true_failures)
                    return response

                except grpc.RpcError as err:
                    if true_failures > settings.MAX_RETRY > 0:
                        retrying = False
                        self.logger.error(
                            '%s has failed %s times due to err '
                            '%s', request_name, count, err)
                        raise err

                    if err.code() in self.retry_status_codes:
                        count += 1
                        is_true_failure = err.code(
                        ) != grpc.StatusCode.UNAVAILABLE
                        true_failures += int(is_true_failure)

                        self.logger.warning(
                            '%sException `%s: %s` during '
                            '%s %s to model %s:%s. Waiting %s '
                            'seconds before retrying.',
                            type(err).__name__,
                            err.code().name, err.details(),
                            self.__class__.__name__, request_name,
                            self.model_name, self.model_version,
                            settings.GRPC_BACKOFF)

                        time.sleep(settings.GRPC_BACKOFF)  # sleep before retry
                        retrying = True  # Unneccessary but explicit
                    else:
                        retrying = False
                        raise err
                except Exception as err:
                    retrying = False
                    self.logger.error(
                        'Encountered %s during %s to model '
                        '%s:%s: %s',
                        type(err).__name__, request_name, self.model_name,
                        self.model_version, err)
                    raise err