def do_inference(hostport, work_dir, concurrency, num_tests):
    """Tests PredictionService with concurrent requests.
  Args:
    hostport: Host:port address of the PredictionService.
    work_dir: The full path of working directory for test data set.
    concurrency: Maximum number of concurrent requests.
    num_tests: Number of test images to use.
  Returns:
    The classification error rate.
  Raises:
    IOError: An error occurred processing test data set.
  """
    test_data_set = input_data.read_data_sets(work_dir).test
    host, port = hostport.split(':')
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    result_counter = _ResultCounter(num_tests, concurrency)
    for _ in range(num_tests):
        request = predict_pb2.PredictRequest()
        request.model_spec.name = 'mnist'
        image, label = test_data_set.next_batch(1)
        request.inputs['images'].CopyFrom(
            tf.contrib.util.make_tensor_proto(image[0],
                                              shape=[1, image[0].size]))
        result_counter.throttle()
        result_future = stub.Predict.future(request, 5.0)  # 5 seconds
        result_future.add_done_callback(
            _create_rpc_callback(label[0], result_counter))
    return result_counter.get_error_rate()
def main():
    host = FLAGS.host
    port = FLAGS.port
    model_name = FLAGS.model_name
    model_version = FLAGS.model_version
    request_timeout = FLAGS.request_timeout

    # Create gRPC client and request
    channel = implementations.insecure_channel(host, port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    if model_version > 0:
        request.model_spec.version.value = model_version

    inputs_np = numpy.asarray([sys.argv[1]])
    inputs_tensor_proto = tf.contrib.util.make_tensor_proto(inputs_np,
                                                            dtype=tf.float32)
    request.inputs['x_observed'].CopyFrom(inputs_tensor_proto)

    # Send request
    result = stub.Predict(request, request_timeout)
    print(result)

    result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred'])
    print('\n%s\n' % result_np)
Example #3
0
def do_inference(process_num, hostport, num_tests, image, label):
    #print("Begin process: {}".format(process_num))
    host, port = hostport.split(':')
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    events = []
    for _ in range(num_tests):
        event = threading.Event()
        request = predict_pb2.PredictRequest()
        request.model_spec.name = FLAGS.model_name
        request.model_spec.signature_name = 'predict_images'
        request.inputs['images'].CopyFrom(
            tf.contrib.util.make_tensor_proto(image[0],
                                              shape=[1, image[0].size]))
        result_future = stub.Predict.future(request, FLAGS.request_delay)
        result_future.add_done_callback(_create_rpc_callback(label[0], event))
        events.append(event)

    for event in events:
        event.wait()
    global lock, counter, real_test_num, start_time, finish_time
    with lock:
        if real_test_num.value == 0:
            real_test_num.value = counter.value
            finish_time.value = time.time()
def main():
  host = FLAGS.host
  port = FLAGS.port
  model_name = FLAGS.model_name
  model_version = FLAGS.model_version
  request_timeout = FLAGS.request_timeout

  # Generate inference data
  keys = numpy.asarray([1, 2, 3])
  keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32)
  features = numpy.asarray(
      [[1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 1, 1, 1, 1, 1, 1, 1, 1],
       [9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 9, 9, 9, 9, 9, 9, 9, 9]])
  features_tensor_proto = tf.contrib.util.make_tensor_proto(features,
                                                            dtype=tf.float32)

  # Create gRPC client and request
  channel = implementations.insecure_channel(host, port)
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = model_name
  if model_version > 0:
    request.model_spec.version.value = model_version
  request.inputs['keys'].CopyFrom(keys_tensor_proto)
  request.inputs['features'].CopyFrom(features_tensor_proto)

  # Send request
  result = stub.Predict(request, request_timeout)
  print(result)
def main():
  host = "127.0.0.1" 
  port = os.environ['PIO_MODEL_SERVER_PORT']
  namespace = os.environ['PIO_MODEL_NAMESPACE'] 
  model_name = os.environ['PIO_MODEL_NAME'] 
  model_version = os.environ['PIO_MODEL_VERSION']
  request_timeout = 5.0

  # Create gRPC client and request
  channel = implementations.insecure_channel(host, port)
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = model_name
  if model_version > 0:
    request.model_spec.version.value = model_version

  inputs_raw = sys.argv[1]
  # TODO:  convert raw json request.body into np array
  inputs_np = numpy.asarray([inputs_raw])
  inputs_tensor_proto = tf.contrib.util.make_tensor_proto(inputs_np,
                                                          dtype=tf.float32)
  request.inputs['x_observed'].CopyFrom(inputs_tensor_proto)

  # Send request
  result = stub.Predict(request, request_timeout)
  print(result)
  
  result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred'])
  print('\n%s\n' % result_np) 
Example #6
0
  def run(self):
    # Convert json input to tensor
    
    input_str = self.inputs.decode('utf-8')
    input_json = json.loads(input_str)
    inputs_np = np.asarray([input_json['x_observed']])
    inputs_tensor_proto = tf.contrib.util.make_tensor_proto(inputs_np,
                                                            dtype=tf.float32)
    # Build the PredictRequest from inputs
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    if model_version > 0:
      request.model_spec.version.value = model_version
    request.inputs['x_observed'].CopyFrom(inputs_tensor_proto)

    # Create gRPC client and request
    grpc_port = int(sys.argv[2])
    channel = implementations.insecure_channel(grpc_host, grpc_port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

    # Send request
    result = stub.Predict(request, request_timeout)

    # Convert PredictResult into np array
    result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred'])

    # Convert np array into json
    result_json = json.dumps({"y_pred": result_np.tolist()[0]})

    return result_json
Example #7
0
    def run(self):
        # Convert json input to tensor
        input_str = self.inputs.decode('utf-8')
        input_json = json.loads(input_str)
        inputs_np = np.asarray([input_json['x_observed']])
        inputs_tensor_proto = tf.contrib.util.make_tensor_proto(
            inputs_np, dtype=tf.float32)
        # Build the PredictRequest from inputs
        request = predict_pb2.PredictRequest()
        request.model_spec.name = model_name
        if model_version > 0:
            request.model_spec.version.value = model_version
        request.inputs['x_observed'].CopyFrom(inputs_tensor_proto)

        # Create gRPC client and request
        grpc_port = int(sys.argv[2])
        channel = implementations.insecure_channel(grpc_host, grpc_port)
        stub = prediction_service_pb2.beta_create_PredictionService_stub(
            channel)

        # Send request
        result = stub.Predict(request, request_timeout)

        # Convert PredictResult into np array
        result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred'])

        # Convert np array into json
        result_json = json.dumps({"y_pred": result_np.tolist()[0]})

        return result_json
Example #8
0
def predict(server, model, data, timeout=10.0):
    """Request generic gRPC server with specified data.
 
  Args:
    server: The address of server. Example: "localhost:9000".
    model: The name of the model. Example: "mnist".
    data: The json data to request. Example: {"keys_dtype": "int32", "keys": [[1], [2]]}.

  Returns:
    The predict result in dictionary format. Example: {"keys": [1, 2]}.
  """
    host, port = server.split(":")
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

    request = predict_pb2.PredictRequest()
    request.model_spec.name = model
    for k, v in data.items():
        if k.endswith("_dtype") == False:
            numpy_data = np.array(v)
            dtype = data[k + "_dtype"]
            request.inputs[k].CopyFrom(
                tensor_util.make_tensor_proto(numpy_data, dtype=dtype))

    result = stub.Predict(request, timeout)
    result_dict = {}
    for k, v in result.outputs.items():
        result_dict[k] = get_tensor_values(v)
    return result_dict
Example #9
0
def test_one_process(i):
    host = FLAGS.host
    port = FLAGS.port
    model_name = FLAGS.model_name
    model_version = FLAGS.model_version
    request_timeout = FLAGS.request_timeout

    request_batch = FLAGS.benchmark_batch_size
    request_data = [i for i in range(request_batch)]
    # Generate inference data
    features = numpy.asarray(request_data)
    features_tensor_proto = tf.contrib.util.make_tensor_proto(features,
                                                              dtype=tf.float32)

    # Create gRPC client and request
    channel = implementations.insecure_channel(host, port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.version.value = model_version
    request.inputs['features'].CopyFrom(features_tensor_proto)

    # Send request
    request_number = FLAGS.benchmark_test_number
    #start_time = time.time()

    for i in range(request_number):
        result_future = stub.Predict.future(request, request_timeout)
        #result_future = stub.Predict.future(request, 0.00000001)
        result_future.add_done_callback(_create_rpc_callback())
def main():
  host = FLAGS.host
  port = FLAGS.port
  model_name = FLAGS.model_name
  model_version = FLAGS.model_version
  request_timeout = FLAGS.request_timeout

  request_batch = FLAGS.benchmark_batch_size
  request_data = [ i for i in range(request_batch)]
  # Generate inference data
  features = numpy.asarray(
      request_data)
  features_tensor_proto = tf.contrib.util.make_tensor_proto(features,
                                                            dtype=tf.float32)

  # Create gRPC client and request
  channel = implementations.insecure_channel(host, port)
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = model_name
  request.model_spec.version.value = model_version
  request.inputs['features'].CopyFrom(features_tensor_proto)

  # Send request

  request_number = FLAGS.benchmark_test_number
  start_time = time.time()
  for i in range(request_number):
    result = stub.Predict(request, request_timeout)

  end_time = time.time()
  print("Average latency is: {} ms".format((end_time - start_time) * 1000 / request_number))
Example #11
0
    def do_post(self, inputs):
        # Create gRPC client and request
        grpc_port = int(sys.argv[2])
        channel = implementations.insecure_channel(grpc_host, grpc_port)
        stub = prediction_service_pb2.beta_create_PredictionService_stub(
            channel)
        request = predict_pb2.PredictRequest()
        request.model_spec.name = model_name
        if model_version > 0:
            request.model_spec.version.value = model_version

        # TODO:  don't hard code this!
        inputs_np = np.asarray([1.0])
        #print(inputs_np)
        inputs_tensor_proto = tf.contrib.util.make_tensor_proto(
            inputs_np, dtype=tf.float32)
        request.inputs['x_observed'].CopyFrom(inputs_tensor_proto)

        # Send request
        result = stub.Predict(request, request_timeout)
        #print(result)

        result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred'])
        #print(result_np)

        return result_np
    def post(self, model_type, model_namespace, model_name, model_version):
        model = self.get_model_assets(model_type, model_namespace, model_name,
                                      model_version)

        # TODO:  Reuse instead of creating this channel everytime
        channel = implementations.insecure_channel(
            self.settings['model_server_tensorflow_serving_host'],
            int(self.settings['model_server_tensorflow_serving_port']))
        stub = prediction_service_pb2.beta_create_PredictionService_stub(
            channel)

        # Transform raw inputs to TensorFlow PredictRequest
        transformed_inputs_request = model.request_transformer.transform_request(
            self.request.body)
        transformed_inputs_request.model_spec.name = model_name
        transformed_inputs_request.model_spec.version.value = int(
            model_version)

        # Transform TensorFlow PredictResponse into output
        outputs = stub.Predict(transformed_inputs_request,
                               self.settings['request_timeout'])
        transformed_outputs = model.response_transformer.transform_response(
            outputs)
        self.write(transformed_outputs)
        self.finish()
def main():
    host = FLAGS.host
    port = FLAGS.port
    model_name = FLAGS.model_name
    model_version = FLAGS.model_version
    request_timeout = FLAGS.request_timeout

    # Generate inference data
    keys = numpy.asarray([1, 2, 3])
    keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32)
    features = numpy.asarray([[1, 2, 3, 4, 5, 6, 7, 8, 9],
                              [1, 1, 1, 1, 1, 1, 1, 1, 1],
                              [9, 8, 7, 6, 5, 4, 3, 2, 1],
                              [9, 9, 9, 9, 9, 9, 9, 9, 9]])
    features_tensor_proto = tf.contrib.util.make_tensor_proto(features,
                                                              dtype=tf.float32)

    # Create gRPC client and request
    channel = implementations.insecure_channel(host, port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    if model_version > 0:
        request.model_spec.version.value = model_version
    request.inputs['keys'].CopyFrom(keys_tensor_proto)
    request.inputs['features'].CopyFrom(features_tensor_proto)

    # Send request
    result = stub.Predict(request, request_timeout)
    print(result)
Example #14
0
def main():
  host = FLAGS.host
  port = FLAGS.port
  model_name = FLAGS.model_name
  model_version = FLAGS.model_version
  request_timeout = FLAGS.request_timeout

  # Create gRPC client and request
  channel = implementations.insecure_channel(host, port)
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = model_name
  if model_version > 0:
    request.model_spec.version.value = model_version

  inputs_np = numpy.asarray([sys.argv[1]])
  inputs_tensor_proto = tf.contrib.util.make_tensor_proto(inputs_np,
                                                          dtype=tf.float32)
  request.inputs['x_observed'].CopyFrom(inputs_tensor_proto)

  # Send request
  result = stub.Predict(request, request_timeout)
  print(result)
  
  result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred'])
  print('\n%s\n' % result_np) 
def main():
    host = FLAGS.host
    port = FLAGS.port
    model_name = FLAGS.model_name
    model_version = FLAGS.model_version
    request_timeout = FLAGS.request_timeout

    request_batch = FLAGS.benchmark_batch_size
    request_data = [i for i in range(request_batch)]
    # Generate inference data
    features = numpy.asarray(request_data)
    features_tensor_proto = tf.contrib.util.make_tensor_proto(features,
                                                              dtype=tf.float32)

    # Create gRPC client and request
    channel = implementations.insecure_channel(host, port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.version.value = model_version
    request.inputs['features'].CopyFrom(features_tensor_proto)

    # Send request

    request_number = FLAGS.benchmark_test_number
    start_time = time.time()
    for i in range(request_number):
        result = stub.Predict(request, request_timeout)

    end_time = time.time()
    print("Average latency is: {} ms".format(
        (end_time - start_time) * 1000 / request_number))
def main():
  host = FLAGS.host
  port = FLAGS.port
  model_name = FLAGS.model_name
  model_version = FLAGS.model_version
  request_timeout = FLAGS.request_timeout
  '''
  Example data:
    0 5:1 6:1 17:1 21:1 35:1 40:1 53:1 63:1 71:1 73:1 74:1 76:1 80:1 83:1
    1 5:1 7:1 17:1 22:1 36:1 40:1 51:1 63:1 67:1 73:1 74:1 76:1 81:1 83:1
  '''

  # Generate keys TensorProto
  keys = numpy.asarray([1, 2])
  keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32)

  # Generate indexs TensorProto
  indexs = numpy.asarray([[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5],
                          [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11],
                          [0, 12], [0, 13], [1, 0], [1, 1], [1, 2], [1, 3],
                          [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9],
                          [1, 10], [1, 11], [1, 12], [1, 13]])
  indexs_tensor_proto = tf.contrib.util.make_tensor_proto(indexs,
                                                          dtype=tf.int64)

  # Generate ids TensorProto
  ids = numpy.asarray([5, 6, 17, 21, 35, 40, 53, 63, 71, 73, 74, 76, 80, 83, 5,
                       7, 17, 22, 36, 40, 51, 63, 67, 73, 74, 76, 81, 83])
  ids_tensor_proto = tf.contrib.util.make_tensor_proto(ids, dtype=tf.int64)

  # Generate values TensorProto
  values = numpy.asarray([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                          1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                          1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
  values_tensor_proto = tf.contrib.util.make_tensor_proto(values,
                                                          dtype=tf.float32)

  # Generate values TensorProto
  shape = numpy.asarray([2, 124])
  shape_tensor_proto = tf.contrib.util.make_tensor_proto(shape, dtype=tf.int64)

  # Create gRPC client and request
  channel = implementations.insecure_channel(host, port)
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = model_name
  if model_version > 0:
    request.model_spec.version.value = model_version

  request.inputs["keys"].CopyFrom(keys_tensor_proto)
  request.inputs["indexs"].CopyFrom(indexs_tensor_proto)
  request.inputs["ids"].CopyFrom(ids_tensor_proto)
  request.inputs["values"].CopyFrom(values_tensor_proto)
  request.inputs["shape"].CopyFrom(shape_tensor_proto)

  # Send request
  result = stub.Predict(request, request_timeout)
  print(result)
  def get_outputs(self, input_data):
    # Step 0: check the type of input parameters
    if not isinstance(self.ns.host, str):
      print("The type of \"host\" must be str (string)!")
      raise IllegalArgumentException
    
    if not re_match("^[0-9localhost.:/]+$", self.ns.host):
      print("hostport does not match preseted character-set" )
      raise IllegalArgumentException
    
    if not isinstance(self.ns.port, int):
      print("The type of \"port\* must be int!")
      raise IllegalArgumentException

    if not isinstance(self.ns.model_name, str):
      print("the type of \"model_name\" must be str (string)!")
      raise IllegalArgumentException
        
    if not re_match("^[0-9A-Za-z_. \-/]+$", self.ns.model_name):
      print("model_name does not match preseted character-set" )
      raise IllegalArgumentException

    if not isinstance(input_data, dict):
      print("the type of \"input_data\" must be dict!")
      raise IllegalArgumentException
        
    if (not isinstance(MAX_RESPONSE_TIME, int)) and (not isinstance(MAX_RESPONSE_TIME, float)):
      print("the type of \"max_response_time\" must be int or float!")
      raise IllegalArgumentException

    # Setup connection
    channel = implementations.insecure_channel(self.ns.host, self.ns.port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    
    # Initialize the request
    request = predict_pb2.PredictRequest()
    request.model_spec.name = self.ns.model_name
    request.model_spec.signature_name = self.ns.model_signature_name
    #request.model_spec.version = self.ns.model_version_num
    # Set the input variables of the request
    for key, value in input_data.items():
      if not re_match("^[0-9A-Za-z_. \-/]+$", key):
        print("model_name does not match preseted character-set" )
        raise IllegalArgumentException
      if isinstance(value, numpy_ndarray):
        request.inputs[key].CopyFrom(make_tensor_proto(value, shape=list(value.shape)))
      elif isinstance(value, int) or isinstance(value, float):
        request.inputs[key].CopyFrom(make_tensor_proto(value) )
      else:
        request.inputs[key].CopyFrom(make_tensor_proto(value, shape=list(value.shape)))
    
    # Obtain the result of prediction
    response = stub.Predict(request, MAX_RESPONSE_TIME)
    if PRINT_RESPONSE:
      responseDict = self.print_response(response)

    return responseDict
Example #18
0
def main(host, port):
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

    start = time.time()
    request = create_request()
    print('Predicting...')
    result = stub.Predict(request, 10.0)  # 10 secs timeout
    duration = time.time() - start
    print("Computed result in %s" % duration)
    print(result)
Example #19
0
def do_inference(hostport, num_tests, image, label):
    host, port = hostport.split(':')
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    for _ in range(num_tests):
        request = predict_pb2.PredictRequest()
        request.model_spec.name = FLAGS.model_name
        request.model_spec.signature_name = 'predict_images'
        request.inputs['images'].CopyFrom(
            tf.contrib.util.make_tensor_proto(image[0],
                                              shape=[1, image[0].size]))
        result_future = stub.Predict.future(request, FLAGS.request_delay)
        result_future.add_done_callback(_create_rpc_callback(label[0]))
Example #20
0
    def __init__(self,
                 tfserving_host=ner_server.split(':')[0],
                 tfserving_port=int(ner_server.split(':')[1])):
        self.tfserving_host = tfserving_host
        self.tfserving_port = tfserving_port

        # setup grcp channel
        self.channel = implementations.insecure_channel(
            self.tfserving_host, self.tfserving_port)

        # setup grpc prediction stub for tfserving
        self.stub = prediction_service_pb2.beta_create_PredictionService_stub(
            self.channel)
        self.char_vob = _get_vob(ner_char2vec_path)
Example #21
0
def main(_):
    host, port = FLAGS.server.split(':')
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    # Send request
    with open(FLAGS.image, 'rb') as f:
        # See prediction_service.proto for gRPC request/response details.
        data = f.read()
        request = predict_pb2.PredictRequest()
        request.model_spec.name = 'inception'
        request.inputs['images'].CopyFrom(
            tf.contrib.util.make_tensor_proto(data, shape=[1]))
        result = stub.Predict(request, 10.0)  # 10 secs timeout
        print(result)
    def run(self):
        # Create gRPC client and request
        channel = implementations.insecure_channel(self.grpc_host, self.grpc_port)
        stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
        request = predict_pb2.PredictRequest()
        request.model_spec.name = self.model_name
        if self.model_version > 0:
            request.model_spec.version.value = self.model_version

        # Transform raw inputs (ie. json) to TensorFlow Serving Request
        transformed_input = input_transformer(self.raw_inputs)
    
        # Send request
        output = stub.Predict(transformed_input, self.request_timeout)

        # Transform TensorFlow Serving Response to raw output (ie. json)
        return output_transformer(output)
    def post(self, model_namespace, model_name, model_version):
        model_key_list = [
            'tensorflow', model_namespace, model_name, model_version
        ]

        try:
            REQUESTS_COUNT.labels('predict', *model_key_list).inc()
            model = self.get_model_assets(model_key_list)
            with REQUEST_LATENCY_BUCKETS.labels('predict',
                                                *model_key_list).time():
                # TODO:  Reuse instead of creating this channel everytime
                channel = implementations.insecure_channel(
                    self.settings['model_server_tensorflow_serving_host'],
                    int(self.settings['model_server_tensorflow_serving_port']))
                stub = prediction_service_pb2.beta_create_PredictionService_stub(
                    channel)

                # Transform raw inputs to TensorFlow PredictRequest
                transformed_inputs_request = model.transform_request(
                    self.request.body)
                inputs_tensor_proto = tf.make_tensor_proto(
                    transformed_inputs_request, dtype=tf.float32)
                tf_request = predict_pb2.PredictRequest()
                tf_request.inputs['x_observed'].CopyFrom(inputs_tensor_proto)

                tf_request.model_spec.name = model_name
                tf_request.model_spec.version.value = int(model_version)

                # Transform TensorFlow PredictResponse into output
                response = stub.Predict(tf_request,
                                        self.settings['request_timeout'])
                response_np = tf.contrib.util.make_ndarray(
                    response.outputs['y_pred'])

                transformed_response_np = model.transform_response(response_np)
                self.write(transformed_response_np)
            self.finish()
        except Exception as e:
            message = 'MainHandler.post: Exception - {0} Error {1}'.format(
                '/'.join(model_key_list), str(e))
            LOGGER.info(message)
            logging.exception(message)
def test_one_process(i):
    host = FLAGS.host
    port = FLAGS.port
    model_name = FLAGS.model_name
    model_version = FLAGS.model_version
    request_timeout = FLAGS.request_timeout

    request_batch = FLAGS.benchmark_batch_size
    request_data = [i for i in range(request_batch)]
    # Generate inference data
    features = numpy.asarray(request_data)
    features_tensor_proto = tf.contrib.util.make_tensor_proto(features,
                                                              dtype=tf.float32)

    # Create gRPC client and request
    channel = implementations.insecure_channel(host, port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.version.value = model_version
    request.inputs['features'].CopyFrom(features_tensor_proto)

    # Send request
    request_number = FLAGS.benchmark_test_number
    #start_time = time.time()

    events = []
    for i in range(request_number):
        event = threading.Event()
        result_future = stub.Predict.future(request, request_timeout)
        #result_future = stub.Predict.future(request, 0.00000001)
        result_future.add_done_callback(_create_rpc_callback(event))
        events.append(event)
        #result = stub.Predict(request, request_timeout)

        #end_time = time.time()
        #print("Average latency is: {} ms".format((end_time - start_time) * 1000 / request_number))
        #print("Average qps is: {}".format(request_number / (end_time - start_time)))

    for event in events:
        event.wait()
def test_one_process(i):
  host = FLAGS.host
  port = FLAGS.port
  model_name = FLAGS.model_name
  model_version = FLAGS.model_version
  request_timeout = FLAGS.request_timeout

  request_batch = FLAGS.benchmark_batch_size
  request_data = [i for i in range(request_batch)]
  # Generate inference data
  features = numpy.asarray(request_data)
  features_tensor_proto = tf.contrib.util.make_tensor_proto(features,
                                                            dtype=tf.float32)

  # Create gRPC client and request
  channel = implementations.insecure_channel(host, port)
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = model_name
  request.model_spec.version.value = model_version
  request.inputs['features'].CopyFrom(features_tensor_proto)

  # Send request
  request_number = FLAGS.benchmark_test_number
  #start_time = time.time()

  events = []
  for i in range(request_number):
    event = threading.Event()
    result_future = stub.Predict.future(request, request_timeout)
    #result_future = stub.Predict.future(request, 0.00000001)
    result_future.add_done_callback(_create_rpc_callback(event))
    events.append(event)
    #result = stub.Predict(request, request_timeout)

    #end_time = time.time()
    #print("Average latency is: {} ms".format((end_time - start_time) * 1000 / request_number))
    #print("Average qps is: {}".format(request_number / (end_time - start_time)))

  for event in events:
    event.wait()
Example #26
0
def main():
    host = FLAGS.host
    port = FLAGS.port
    model_name = FLAGS.model_name
    model_version = FLAGS.model_version
    request_timeout = FLAGS.request_timeout

    # Create gRPC client and request
    channel = implementations.insecure_channel(host, port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    if model_version > 0:
        request.model_spec.version.value = model_version

    env = gym.make(FLAGS.gym_env)
    state = env.reset()
    total_reward = 0

    while True:
        if FLAGS.render_game:
            time.sleep(0.1)
            env.render()

        # Generate inference data
        features = numpy.asarray([state])
        features_tensor_proto = tf.contrib.util.make_tensor_proto(
            features, dtype=tf.float32)
        request.inputs['states'].CopyFrom(features_tensor_proto)

        # Send request
        result = stub.Predict(request, request_timeout)
        action = int(result.outputs.get("actions").int64_val[0])

        next_state, reward, done, info = env.step(action)
        total_reward += reward
        state = next_state

        if done:
            print("End of the game, reward: {}".format(total_reward))
            break
def model_prediction():
    host = "localhost"
    port = 9000
    model_name = "resnet"
    json = flask.request.get_json()
    url_input = json['input']

    image = urllib.urlopen(url_input).read()

    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'resnet'
    request.model_spec.signature_name = 'predict_images'
    request.inputs['images'].CopyFrom(
        tf.contrib.util.make_tensor_proto(image, shape=[1]))
    result = stub.Predict(request, 10.0)  # 10 secs timeout
    print(result)

    return result
def main():
  host = FLAGS.host
  port = FLAGS.port
  model_name = FLAGS.model_name
  model_version = FLAGS.model_version
  request_timeout = FLAGS.request_timeout

  image_filepaths = ["test-image.jpg"]

  for index, image_filepath in enumerate(image_filepaths):
    image_ndarray = image.img_to_array(image.load_img(image_filepaths[0], target_size=(224, 224)))
    image_ndarray = image_ndarray / 255.

  # Create gRPC client and request
  channel = implementations.insecure_channel(host, port)
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = model_name
  request.model_spec.version.value = model_version
  request.inputs['input_image'].CopyFrom(tf.contrib.util.make_tensor_proto(image_ndarray, shape=[1] + list(image_ndarray.shape)))

  # Send request
  result = str(stub.Predict(request, request_timeout))
  mylist = result.split('\n')[-8:-3]
  finallist = []
  for element in mylist:
      element = element.split(':')[1]
      finallist.append(float("{:.6f}".format(float(element))))

  index = finallist.index(max(finallist))
  CLASSES = ['Daisy', 'Dandelion', 'Rosa', 'Girasol', 'Tulipán']

  ClassPred = CLASSES[index]
  ClassProb = finallist[index]

  print(finallist)
  print(ClassPred)
  print(ClassProb)
Example #29
0
def main():
    host = FLAGS.host
    port = FLAGS.port
    model_name = FLAGS.model_name
    model_version = FLAGS.model_version
    request_timeout = FLAGS.request_timeout

    # Generate inference data
    keys = np.asarray([1, 2, 3, 4, 5])
    keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32)

    features = np.ndarray(shape=(5, 32, 32, 3), dtype=np.float32)

    image_filepaths = [
        "../data/inference/Blastoise.png", "../data/inference/Charizard.png",
        "../data/inference/Mew.png", "../data/inference/Pikachu.png",
        "../data/inference/Venusaur.png"
    ]

    for index, image_filepath in enumerate(image_filepaths):
        image_ndarray = ndimage.imread(image_filepaths[0], mode="RGB")
        features[index] = image_ndarray

    features_tensor_proto = tf.contrib.util.make_tensor_proto(features,
                                                              dtype=tf.float32)

    # Create gRPC client and request
    channel = implementations.insecure_channel(host, port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.version.value = model_version
    request.inputs['keys'].CopyFrom(keys_tensor_proto)
    request.inputs['features'].CopyFrom(features_tensor_proto)

    # Send request
    result = stub.Predict(request, request_timeout)
    print(result)
Example #30
0
def main(_):
  host, port = FLAGS.server.split(':')
  channel = implementations.insecure_channel(host, int(port))
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  # Send request
  with open(FLAGS.image, 'rb') as f:
    # See prediction_service.proto for gRPC request/response details.
    data = f.read()
    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'inception'
    request.inputs['images'].CopyFrom(
        tf.contrib.util.make_tensor_proto(data, shape=[1]))


    request_number = FLAGS.benchmark_test_number
    start_time = time.time()

    for i in range(request_number):
      result = stub.Predict(request, 10.0)  # 10 secs timeout
      # print(result)

    end_time = time.time()
    print("Average latency is: {} ms".format((end_time - start_time) * 1000 / request_number))
Example #31
0
    def __init__(self, host, port):
        serv_host = FLAGS.host
        serv_port = FLAGS.port
        model_name = FLAGS.model_name
        model_version = FLAGS.model_version
        self.request_timeout = FLAGS.request_timeout

        # Create gRPC client and request
        channel = implementations.insecure_channel(serv_host, serv_port)
        self.stub = prediction_service_pb2.beta_create_PredictionService_stub(
            channel)
        self.request = predict_pb2.PredictRequest()
        self.request.model_spec.name = model_name
        self.request.model_spec.signature_name = 'predict_images'

        if model_version > 0:
            self.request.model_spec.version.value = model_version

        self._host = host
        self._port = port
        bottle.BaseRequest.MEMFILE_MAX = 1000000
        self._app = bottle.Bottle()
        self._route()
Example #32
0
    def infer(self, data, shape):
        def get_output_shape(r):
            dim = r['outputs']['outputs']['tensorShape']['dim']
            return (int(dim[0]['size']), int(dim[1]['size']))

        def get_output_data(r):
            return r.get(
                'outputs', {}
            ).get('outputs', {}).get('floatVal')

        channel = implementations.insecure_channel(
            self.server_host, self.server_port
        )
        stub = prediction_service_pb2.beta_create_PredictionService_stub(
            channel
        )
        # Send request
        request = predict_pb2.PredictRequest()
        request.model_spec.name = 'main_model'
        request.model_spec.signature_name = 'predict'

        request.inputs['inputs'].CopyFrom(
            tf.contrib.util.make_tensor_proto(
                data, shape=shape
            )
        )

        result = stub.Predict(request, 10.0)  # 10 secs timeout
        print("Type:", type(result))
        print(result)
        result = json.loads(MessageToJson(result))

        data = get_output_data(result)
        shape = get_output_shape(result)

        return dict(data=data, shape=shape)
Example #33
0
def predict(model_name):
    from flask import request
    from flask import jsonify
    from flask import redirect
    print("...... calling predict ......")
    data = {"success": False}
    if request.method == "POST":
        # check if the post request has the file part
        if 'file' not in request.files:
            print('No file part')
        file = request.files['file']
        # if user does not select file, browser also submit a empty part without filename
        if file.filename == '':
            print('No selected file')
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))

            #loading image
            filename = UPLOAD_FOLDER + '/' + filename
            print("\nfilename:", filename)

            host = "127.0.0.1"
            port = 8500
            model_name = model_name
            model_version = 1
            request_timeout = 10.0

            image_filepaths = [filename]

            for index, image_filepath in enumerate(image_filepaths):
                image_ndarray = image.img_to_array(
                    image.load_img(image_filepaths[0], target_size=(224, 224)))
                image_ndarray = image_ndarray / 255.

            # Create gRPC client and request
            channel = implementations.insecure_channel(host, port)
            stub = prediction_service_pb2.beta_create_PredictionService_stub(
                channel)
            request = predict_pb2.PredictRequest()
            request.model_spec.name = model_name
            request.model_spec.version.value = model_version
            request.inputs['input_image'].CopyFrom(
                tf.contrib.util.make_tensor_proto(image_ndarray,
                                                  shape=[1] +
                                                  list(image_ndarray.shape)))

            # Send request
            result = str(stub.Predict(request, request_timeout))
            mylist = result.split('\n')[-8:-3]
            finallist = []
            for element in mylist:
                element = element.split(':')[1]
                finallist.append(float("{:.6f}".format(float(element))))

            index = finallist.index(max(finallist))
            CLASSES = ['Daisy', 'Dandelion', 'Rosa', 'Girasol', 'Tulipán']

            ClassPred = CLASSES[index]
            ClassProb = finallist[index]

            print(finallist)
            print(ClassPred)
            print(ClassProb)

            label = ClassPred
            score = ClassProb

            #Results as Json
            data["predictions"] = []
            r = {"label": label, "score": float(score)}
            data["predictions"].append(r)

            #Success
            data["success"] = True

    return jsonify(data)
Example #34
0
    """获取文本向量
    Args:
        text: 待检测文本
        wv: 词向量模型
    Returns:
        [[[ 3.80905056   1.94315064  -0.20703495  -1.31589055   1.9627794
           ...
           2.16935492   2.95426321  -4.71534014  -3.25034237 -11.28901672]]]
    """
    text = tr.extractWords(text)
    words = jieba.cut(text.strip())
    text_sequence = []
    for word in words:
        try:
            text_sequence.append(wv[word])
        except KeyError:
            text_sequence.append(wv['UNK'])
    text_sequence = np.asarray(text_sequence)
    sample = text_sequence.reshape(1, len(text_sequence), 200)
    return sample


print(" ".join(jieba.cut('分词初始化')))
wv = tl.files.load_npy_to_any(name='../word2vec/output/model_word2vec_200.npy')

host, port = ('localhost', '9000')
channel = implementations.insecure_channel(host, int(port))
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
request = predict_pb2.PredictRequest()
request.model_spec.name = 'antispam'