def do_inference(hostport, work_dir, concurrency, num_tests): """Tests PredictionService with concurrent requests. Args: hostport: Host:port address of the PredictionService. work_dir: The full path of working directory for test data set. concurrency: Maximum number of concurrent requests. num_tests: Number of test images to use. Returns: The classification error rate. Raises: IOError: An error occurred processing test data set. """ test_data_set = input_data.read_data_sets(work_dir).test host, port = hostport.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) result_counter = _ResultCounter(num_tests, concurrency) for _ in range(num_tests): request = predict_pb2.PredictRequest() request.model_spec.name = 'mnist' image, label = test_data_set.next_batch(1) request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image[0], shape=[1, image[0].size])) result_counter.throttle() result_future = stub.Predict.future(request, 5.0) # 5 seconds result_future.add_done_callback( _create_rpc_callback(label[0], result_counter)) return result_counter.get_error_rate()
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version inputs_np = numpy.asarray([sys.argv[1]]) inputs_tensor_proto = tf.contrib.util.make_tensor_proto(inputs_np, dtype=tf.float32) request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result) result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred']) print('\n%s\n' % result_np)
def do_inference(process_num, hostport, num_tests, image, label): #print("Begin process: {}".format(process_num)) host, port = hostport.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) events = [] for _ in range(num_tests): event = threading.Event() request = predict_pb2.PredictRequest() request.model_spec.name = FLAGS.model_name request.model_spec.signature_name = 'predict_images' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image[0], shape=[1, image[0].size])) result_future = stub.Predict.future(request, FLAGS.request_delay) result_future.add_done_callback(_create_rpc_callback(label[0], event)) events.append(event) for event in events: event.wait() global lock, counter, real_test_num, start_time, finish_time with lock: if real_test_num.value == 0: real_test_num.value = counter.value finish_time.value = time.time()
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout # Generate inference data keys = numpy.asarray([1, 2, 3]) keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32) features = numpy.asarray( [[1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 1, 1, 1, 1, 1, 1, 1, 1], [9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 9, 9, 9, 9, 9, 9, 9, 9]]) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version request.inputs['keys'].CopyFrom(keys_tensor_proto) request.inputs['features'].CopyFrom(features_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result)
def main(): host = "127.0.0.1" port = os.environ['PIO_MODEL_SERVER_PORT'] namespace = os.environ['PIO_MODEL_NAMESPACE'] model_name = os.environ['PIO_MODEL_NAME'] model_version = os.environ['PIO_MODEL_VERSION'] request_timeout = 5.0 # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version inputs_raw = sys.argv[1] # TODO: convert raw json request.body into np array inputs_np = numpy.asarray([inputs_raw]) inputs_tensor_proto = tf.contrib.util.make_tensor_proto(inputs_np, dtype=tf.float32) request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result) result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred']) print('\n%s\n' % result_np)
def run(self): # Convert json input to tensor input_str = self.inputs.decode('utf-8') input_json = json.loads(input_str) inputs_np = np.asarray([input_json['x_observed']]) inputs_tensor_proto = tf.contrib.util.make_tensor_proto(inputs_np, dtype=tf.float32) # Build the PredictRequest from inputs request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) # Create gRPC client and request grpc_port = int(sys.argv[2]) channel = implementations.insecure_channel(grpc_host, grpc_port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) # Send request result = stub.Predict(request, request_timeout) # Convert PredictResult into np array result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred']) # Convert np array into json result_json = json.dumps({"y_pred": result_np.tolist()[0]}) return result_json
def run(self): # Convert json input to tensor input_str = self.inputs.decode('utf-8') input_json = json.loads(input_str) inputs_np = np.asarray([input_json['x_observed']]) inputs_tensor_proto = tf.contrib.util.make_tensor_proto( inputs_np, dtype=tf.float32) # Build the PredictRequest from inputs request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) # Create gRPC client and request grpc_port = int(sys.argv[2]) channel = implementations.insecure_channel(grpc_host, grpc_port) stub = prediction_service_pb2.beta_create_PredictionService_stub( channel) # Send request result = stub.Predict(request, request_timeout) # Convert PredictResult into np array result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred']) # Convert np array into json result_json = json.dumps({"y_pred": result_np.tolist()[0]}) return result_json
def predict(server, model, data, timeout=10.0): """Request generic gRPC server with specified data. Args: server: The address of server. Example: "localhost:9000". model: The name of the model. Example: "mnist". data: The json data to request. Example: {"keys_dtype": "int32", "keys": [[1], [2]]}. Returns: The predict result in dictionary format. Example: {"keys": [1, 2]}. """ host, port = server.split(":") channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model for k, v in data.items(): if k.endswith("_dtype") == False: numpy_data = np.array(v) dtype = data[k + "_dtype"] request.inputs[k].CopyFrom( tensor_util.make_tensor_proto(numpy_data, dtype=dtype)) result = stub.Predict(request, timeout) result_dict = {} for k, v in result.outputs.items(): result_dict[k] = get_tensor_values(v) return result_dict
def test_one_process(i): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout request_batch = FLAGS.benchmark_batch_size request_data = [i for i in range(request_batch)] # Generate inference data features = numpy.asarray(request_data) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['features'].CopyFrom(features_tensor_proto) # Send request request_number = FLAGS.benchmark_test_number #start_time = time.time() for i in range(request_number): result_future = stub.Predict.future(request, request_timeout) #result_future = stub.Predict.future(request, 0.00000001) result_future.add_done_callback(_create_rpc_callback())
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout request_batch = FLAGS.benchmark_batch_size request_data = [ i for i in range(request_batch)] # Generate inference data features = numpy.asarray( request_data) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['features'].CopyFrom(features_tensor_proto) # Send request request_number = FLAGS.benchmark_test_number start_time = time.time() for i in range(request_number): result = stub.Predict(request, request_timeout) end_time = time.time() print("Average latency is: {} ms".format((end_time - start_time) * 1000 / request_number))
def do_post(self, inputs): # Create gRPC client and request grpc_port = int(sys.argv[2]) channel = implementations.insecure_channel(grpc_host, grpc_port) stub = prediction_service_pb2.beta_create_PredictionService_stub( channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version # TODO: don't hard code this! inputs_np = np.asarray([1.0]) #print(inputs_np) inputs_tensor_proto = tf.contrib.util.make_tensor_proto( inputs_np, dtype=tf.float32) request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) # Send request result = stub.Predict(request, request_timeout) #print(result) result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred']) #print(result_np) return result_np
def post(self, model_type, model_namespace, model_name, model_version): model = self.get_model_assets(model_type, model_namespace, model_name, model_version) # TODO: Reuse instead of creating this channel everytime channel = implementations.insecure_channel( self.settings['model_server_tensorflow_serving_host'], int(self.settings['model_server_tensorflow_serving_port'])) stub = prediction_service_pb2.beta_create_PredictionService_stub( channel) # Transform raw inputs to TensorFlow PredictRequest transformed_inputs_request = model.request_transformer.transform_request( self.request.body) transformed_inputs_request.model_spec.name = model_name transformed_inputs_request.model_spec.version.value = int( model_version) # Transform TensorFlow PredictResponse into output outputs = stub.Predict(transformed_inputs_request, self.settings['request_timeout']) transformed_outputs = model.response_transformer.transform_response( outputs) self.write(transformed_outputs) self.finish()
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout # Generate inference data keys = numpy.asarray([1, 2, 3]) keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32) features = numpy.asarray([[1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 1, 1, 1, 1, 1, 1, 1, 1], [9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 9, 9, 9, 9, 9, 9, 9, 9]]) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version request.inputs['keys'].CopyFrom(keys_tensor_proto) request.inputs['features'].CopyFrom(features_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result)
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout request_batch = FLAGS.benchmark_batch_size request_data = [i for i in range(request_batch)] # Generate inference data features = numpy.asarray(request_data) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['features'].CopyFrom(features_tensor_proto) # Send request request_number = FLAGS.benchmark_test_number start_time = time.time() for i in range(request_number): result = stub.Predict(request, request_timeout) end_time = time.time() print("Average latency is: {} ms".format( (end_time - start_time) * 1000 / request_number))
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout ''' Example data: 0 5:1 6:1 17:1 21:1 35:1 40:1 53:1 63:1 71:1 73:1 74:1 76:1 80:1 83:1 1 5:1 7:1 17:1 22:1 36:1 40:1 51:1 63:1 67:1 73:1 74:1 76:1 81:1 83:1 ''' # Generate keys TensorProto keys = numpy.asarray([1, 2]) keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32) # Generate indexs TensorProto indexs = numpy.asarray([[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11], [0, 12], [0, 13], [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], [1, 11], [1, 12], [1, 13]]) indexs_tensor_proto = tf.contrib.util.make_tensor_proto(indexs, dtype=tf.int64) # Generate ids TensorProto ids = numpy.asarray([5, 6, 17, 21, 35, 40, 53, 63, 71, 73, 74, 76, 80, 83, 5, 7, 17, 22, 36, 40, 51, 63, 67, 73, 74, 76, 81, 83]) ids_tensor_proto = tf.contrib.util.make_tensor_proto(ids, dtype=tf.int64) # Generate values TensorProto values = numpy.asarray([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) values_tensor_proto = tf.contrib.util.make_tensor_proto(values, dtype=tf.float32) # Generate values TensorProto shape = numpy.asarray([2, 124]) shape_tensor_proto = tf.contrib.util.make_tensor_proto(shape, dtype=tf.int64) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version request.inputs["keys"].CopyFrom(keys_tensor_proto) request.inputs["indexs"].CopyFrom(indexs_tensor_proto) request.inputs["ids"].CopyFrom(ids_tensor_proto) request.inputs["values"].CopyFrom(values_tensor_proto) request.inputs["shape"].CopyFrom(shape_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result)
def get_outputs(self, input_data): # Step 0: check the type of input parameters if not isinstance(self.ns.host, str): print("The type of \"host\" must be str (string)!") raise IllegalArgumentException if not re_match("^[0-9localhost.:/]+$", self.ns.host): print("hostport does not match preseted character-set" ) raise IllegalArgumentException if not isinstance(self.ns.port, int): print("The type of \"port\* must be int!") raise IllegalArgumentException if not isinstance(self.ns.model_name, str): print("the type of \"model_name\" must be str (string)!") raise IllegalArgumentException if not re_match("^[0-9A-Za-z_. \-/]+$", self.ns.model_name): print("model_name does not match preseted character-set" ) raise IllegalArgumentException if not isinstance(input_data, dict): print("the type of \"input_data\" must be dict!") raise IllegalArgumentException if (not isinstance(MAX_RESPONSE_TIME, int)) and (not isinstance(MAX_RESPONSE_TIME, float)): print("the type of \"max_response_time\" must be int or float!") raise IllegalArgumentException # Setup connection channel = implementations.insecure_channel(self.ns.host, self.ns.port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) # Initialize the request request = predict_pb2.PredictRequest() request.model_spec.name = self.ns.model_name request.model_spec.signature_name = self.ns.model_signature_name #request.model_spec.version = self.ns.model_version_num # Set the input variables of the request for key, value in input_data.items(): if not re_match("^[0-9A-Za-z_. \-/]+$", key): print("model_name does not match preseted character-set" ) raise IllegalArgumentException if isinstance(value, numpy_ndarray): request.inputs[key].CopyFrom(make_tensor_proto(value, shape=list(value.shape))) elif isinstance(value, int) or isinstance(value, float): request.inputs[key].CopyFrom(make_tensor_proto(value) ) else: request.inputs[key].CopyFrom(make_tensor_proto(value, shape=list(value.shape))) # Obtain the result of prediction response = stub.Predict(request, MAX_RESPONSE_TIME) if PRINT_RESPONSE: responseDict = self.print_response(response) return responseDict
def main(host, port): channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) start = time.time() request = create_request() print('Predicting...') result = stub.Predict(request, 10.0) # 10 secs timeout duration = time.time() - start print("Computed result in %s" % duration) print(result)
def do_inference(hostport, num_tests, image, label): host, port = hostport.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) for _ in range(num_tests): request = predict_pb2.PredictRequest() request.model_spec.name = FLAGS.model_name request.model_spec.signature_name = 'predict_images' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image[0], shape=[1, image[0].size])) result_future = stub.Predict.future(request, FLAGS.request_delay) result_future.add_done_callback(_create_rpc_callback(label[0]))
def __init__(self, tfserving_host=ner_server.split(':')[0], tfserving_port=int(ner_server.split(':')[1])): self.tfserving_host = tfserving_host self.tfserving_port = tfserving_port # setup grcp channel self.channel = implementations.insecure_channel( self.tfserving_host, self.tfserving_port) # setup grpc prediction stub for tfserving self.stub = prediction_service_pb2.beta_create_PredictionService_stub( self.channel) self.char_vob = _get_vob(ner_char2vec_path)
def main(_): host, port = FLAGS.server.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) # Send request with open(FLAGS.image, 'rb') as f: # See prediction_service.proto for gRPC request/response details. data = f.read() request = predict_pb2.PredictRequest() request.model_spec.name = 'inception' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(data, shape=[1])) result = stub.Predict(request, 10.0) # 10 secs timeout print(result)
def run(self): # Create gRPC client and request channel = implementations.insecure_channel(self.grpc_host, self.grpc_port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = self.model_name if self.model_version > 0: request.model_spec.version.value = self.model_version # Transform raw inputs (ie. json) to TensorFlow Serving Request transformed_input = input_transformer(self.raw_inputs) # Send request output = stub.Predict(transformed_input, self.request_timeout) # Transform TensorFlow Serving Response to raw output (ie. json) return output_transformer(output)
def post(self, model_namespace, model_name, model_version): model_key_list = [ 'tensorflow', model_namespace, model_name, model_version ] try: REQUESTS_COUNT.labels('predict', *model_key_list).inc() model = self.get_model_assets(model_key_list) with REQUEST_LATENCY_BUCKETS.labels('predict', *model_key_list).time(): # TODO: Reuse instead of creating this channel everytime channel = implementations.insecure_channel( self.settings['model_server_tensorflow_serving_host'], int(self.settings['model_server_tensorflow_serving_port'])) stub = prediction_service_pb2.beta_create_PredictionService_stub( channel) # Transform raw inputs to TensorFlow PredictRequest transformed_inputs_request = model.transform_request( self.request.body) inputs_tensor_proto = tf.make_tensor_proto( transformed_inputs_request, dtype=tf.float32) tf_request = predict_pb2.PredictRequest() tf_request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) tf_request.model_spec.name = model_name tf_request.model_spec.version.value = int(model_version) # Transform TensorFlow PredictResponse into output response = stub.Predict(tf_request, self.settings['request_timeout']) response_np = tf.contrib.util.make_ndarray( response.outputs['y_pred']) transformed_response_np = model.transform_response(response_np) self.write(transformed_response_np) self.finish() except Exception as e: message = 'MainHandler.post: Exception - {0} Error {1}'.format( '/'.join(model_key_list), str(e)) LOGGER.info(message) logging.exception(message)
def test_one_process(i): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout request_batch = FLAGS.benchmark_batch_size request_data = [i for i in range(request_batch)] # Generate inference data features = numpy.asarray(request_data) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['features'].CopyFrom(features_tensor_proto) # Send request request_number = FLAGS.benchmark_test_number #start_time = time.time() events = [] for i in range(request_number): event = threading.Event() result_future = stub.Predict.future(request, request_timeout) #result_future = stub.Predict.future(request, 0.00000001) result_future.add_done_callback(_create_rpc_callback(event)) events.append(event) #result = stub.Predict(request, request_timeout) #end_time = time.time() #print("Average latency is: {} ms".format((end_time - start_time) * 1000 / request_number)) #print("Average qps is: {}".format(request_number / (end_time - start_time))) for event in events: event.wait()
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version env = gym.make(FLAGS.gym_env) state = env.reset() total_reward = 0 while True: if FLAGS.render_game: time.sleep(0.1) env.render() # Generate inference data features = numpy.asarray([state]) features_tensor_proto = tf.contrib.util.make_tensor_proto( features, dtype=tf.float32) request.inputs['states'].CopyFrom(features_tensor_proto) # Send request result = stub.Predict(request, request_timeout) action = int(result.outputs.get("actions").int64_val[0]) next_state, reward, done, info = env.step(action) total_reward += reward state = next_state if done: print("End of the game, reward: {}".format(total_reward)) break
def model_prediction(): host = "localhost" port = 9000 model_name = "resnet" json = flask.request.get_json() url_input = json['input'] image = urllib.urlopen(url_input).read() channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = 'resnet' request.model_spec.signature_name = 'predict_images' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image, shape=[1])) result = stub.Predict(request, 10.0) # 10 secs timeout print(result) return result
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout image_filepaths = ["test-image.jpg"] for index, image_filepath in enumerate(image_filepaths): image_ndarray = image.img_to_array(image.load_img(image_filepaths[0], target_size=(224, 224))) image_ndarray = image_ndarray / 255. # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['input_image'].CopyFrom(tf.contrib.util.make_tensor_proto(image_ndarray, shape=[1] + list(image_ndarray.shape))) # Send request result = str(stub.Predict(request, request_timeout)) mylist = result.split('\n')[-8:-3] finallist = [] for element in mylist: element = element.split(':')[1] finallist.append(float("{:.6f}".format(float(element)))) index = finallist.index(max(finallist)) CLASSES = ['Daisy', 'Dandelion', 'Rosa', 'Girasol', 'Tulipán'] ClassPred = CLASSES[index] ClassProb = finallist[index] print(finallist) print(ClassPred) print(ClassProb)
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout # Generate inference data keys = np.asarray([1, 2, 3, 4, 5]) keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32) features = np.ndarray(shape=(5, 32, 32, 3), dtype=np.float32) image_filepaths = [ "../data/inference/Blastoise.png", "../data/inference/Charizard.png", "../data/inference/Mew.png", "../data/inference/Pikachu.png", "../data/inference/Venusaur.png" ] for index, image_filepath in enumerate(image_filepaths): image_ndarray = ndimage.imread(image_filepaths[0], mode="RGB") features[index] = image_ndarray features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['keys'].CopyFrom(keys_tensor_proto) request.inputs['features'].CopyFrom(features_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result)
def main(_): host, port = FLAGS.server.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) # Send request with open(FLAGS.image, 'rb') as f: # See prediction_service.proto for gRPC request/response details. data = f.read() request = predict_pb2.PredictRequest() request.model_spec.name = 'inception' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(data, shape=[1])) request_number = FLAGS.benchmark_test_number start_time = time.time() for i in range(request_number): result = stub.Predict(request, 10.0) # 10 secs timeout # print(result) end_time = time.time() print("Average latency is: {} ms".format((end_time - start_time) * 1000 / request_number))
def __init__(self, host, port): serv_host = FLAGS.host serv_port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version self.request_timeout = FLAGS.request_timeout # Create gRPC client and request channel = implementations.insecure_channel(serv_host, serv_port) self.stub = prediction_service_pb2.beta_create_PredictionService_stub( channel) self.request = predict_pb2.PredictRequest() self.request.model_spec.name = model_name self.request.model_spec.signature_name = 'predict_images' if model_version > 0: self.request.model_spec.version.value = model_version self._host = host self._port = port bottle.BaseRequest.MEMFILE_MAX = 1000000 self._app = bottle.Bottle() self._route()
def infer(self, data, shape): def get_output_shape(r): dim = r['outputs']['outputs']['tensorShape']['dim'] return (int(dim[0]['size']), int(dim[1]['size'])) def get_output_data(r): return r.get( 'outputs', {} ).get('outputs', {}).get('floatVal') channel = implementations.insecure_channel( self.server_host, self.server_port ) stub = prediction_service_pb2.beta_create_PredictionService_stub( channel ) # Send request request = predict_pb2.PredictRequest() request.model_spec.name = 'main_model' request.model_spec.signature_name = 'predict' request.inputs['inputs'].CopyFrom( tf.contrib.util.make_tensor_proto( data, shape=shape ) ) result = stub.Predict(request, 10.0) # 10 secs timeout print("Type:", type(result)) print(result) result = json.loads(MessageToJson(result)) data = get_output_data(result) shape = get_output_shape(result) return dict(data=data, shape=shape)
def predict(model_name): from flask import request from flask import jsonify from flask import redirect print("...... calling predict ......") data = {"success": False} if request.method == "POST": # check if the post request has the file part if 'file' not in request.files: print('No file part') file = request.files['file'] # if user does not select file, browser also submit a empty part without filename if file.filename == '': print('No selected file') if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) #loading image filename = UPLOAD_FOLDER + '/' + filename print("\nfilename:", filename) host = "127.0.0.1" port = 8500 model_name = model_name model_version = 1 request_timeout = 10.0 image_filepaths = [filename] for index, image_filepath in enumerate(image_filepaths): image_ndarray = image.img_to_array( image.load_img(image_filepaths[0], target_size=(224, 224))) image_ndarray = image_ndarray / 255. # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub( channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['input_image'].CopyFrom( tf.contrib.util.make_tensor_proto(image_ndarray, shape=[1] + list(image_ndarray.shape))) # Send request result = str(stub.Predict(request, request_timeout)) mylist = result.split('\n')[-8:-3] finallist = [] for element in mylist: element = element.split(':')[1] finallist.append(float("{:.6f}".format(float(element)))) index = finallist.index(max(finallist)) CLASSES = ['Daisy', 'Dandelion', 'Rosa', 'Girasol', 'Tulipán'] ClassPred = CLASSES[index] ClassProb = finallist[index] print(finallist) print(ClassPred) print(ClassProb) label = ClassPred score = ClassProb #Results as Json data["predictions"] = [] r = {"label": label, "score": float(score)} data["predictions"].append(r) #Success data["success"] = True return jsonify(data)
"""获取文本向量 Args: text: 待检测文本 wv: 词向量模型 Returns: [[[ 3.80905056 1.94315064 -0.20703495 -1.31589055 1.9627794 ... 2.16935492 2.95426321 -4.71534014 -3.25034237 -11.28901672]]] """ text = tr.extractWords(text) words = jieba.cut(text.strip()) text_sequence = [] for word in words: try: text_sequence.append(wv[word]) except KeyError: text_sequence.append(wv['UNK']) text_sequence = np.asarray(text_sequence) sample = text_sequence.reshape(1, len(text_sequence), 200) return sample print(" ".join(jieba.cut('分词初始化'))) wv = tl.files.load_npy_to_any(name='../word2vec/output/model_word2vec_200.npy') host, port = ('localhost', '9000') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = 'antispam'