def create_request(): print('Downloading image') image_data = [] image_data.append( urllib.urlopen( "https://cammyeu.blob.core.windows.net/2017-07-13/3e961e944d7c9631f64648f3f639e06924c191edG.jpg" ).read()) image_data.append( urllib.urlopen( "https://cammyau.blob.core.windows.net/2017-07-13/24df8d25d4e1282bc45590c919586882dae2c24fE.jpg" ).read()) image_data.append( urllib.urlopen( "https://cammyeu.blob.core.windows.net/2017-07-13/323245bab1d12e47f616f48b39548c8ec20295bbG.jpg" ).read()) image_data.append( urllib.urlopen( "https://cammyus.blob.core.windows.net/2017-07-27/6bd2d1e93c4d81f247b11d3b97e73774f8c3ae7dF.jpg" ).read()) image_data.append( urllib.urlopen( "https://cammyus.blob.core.windows.net/2017-07-27/72107e8b981151bf806cdf7b35837403bbb592e1F.jpg" ).read()) print('Make prediction request') request = predict_pb2.PredictRequest() request.model_spec.name = 'resnet' request.model_spec.signature_name = 'predict_images' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image_data, shape=[len(image_data)])) return request
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version inputs_np = numpy.asarray([sys.argv[1]]) inputs_tensor_proto = tf.contrib.util.make_tensor_proto(inputs_np, dtype=tf.float32) request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result) result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred']) print('\n%s\n' % result_np)
def main(): host = "127.0.0.1" port = os.environ['PIO_MODEL_SERVER_PORT'] namespace = os.environ['PIO_MODEL_NAMESPACE'] model_name = os.environ['PIO_MODEL_NAME'] model_version = os.environ['PIO_MODEL_VERSION'] request_timeout = 5.0 # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version inputs_raw = sys.argv[1] # TODO: convert raw json request.body into np array inputs_np = numpy.asarray([inputs_raw]) inputs_tensor_proto = tf.contrib.util.make_tensor_proto(inputs_np, dtype=tf.float32) request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result) result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred']) print('\n%s\n' % result_np)
def do_inference(process_num, hostport, num_tests, image, label): #print("Begin process: {}".format(process_num)) host, port = hostport.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) events = [] for _ in range(num_tests): event = threading.Event() request = predict_pb2.PredictRequest() request.model_spec.name = FLAGS.model_name request.model_spec.signature_name = 'predict_images' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image[0], shape=[1, image[0].size])) result_future = stub.Predict.future(request, FLAGS.request_delay) result_future.add_done_callback(_create_rpc_callback(label[0], event)) events.append(event) for event in events: event.wait() global lock, counter, real_test_num, start_time, finish_time with lock: if real_test_num.value == 0: real_test_num.value = counter.value finish_time.value = time.time()
def recognise(request): try: f = request.FILES['image'] #Return a k length list of unique elements chosen from the population sequence get_pic_name = f.name + ''.join( random.sample(string.ascii_letters + string.digits, 3)) # store the file in disk with open('static/img/' + get_pic_name, 'wb+') as destination: for chunk in f.chunks(): destination.write(chunk) print '[INFO]:', request.method print '[INFO]:get from upload' except: get_pic_name = request.GET['image_name'] print '[INFO]:', request.method print '[INFO]:get from click image' finally: print '[INFO]:Get', get_pic_name, '\n' pic_data = open('static/img/' + get_pic_name, 'rb').read() request = predict_pb2.PredictRequest() request.model_spec.name = 'inception' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(pic_data, shape=[1])) result = stub.Predict(request, 10.0) # 10 secs timeout feat = result.outputs['feats'].float_val #TODO annoy get result idxs = t.get_nns_by_vector(feat, 8) data = {} data["image_name"] = get_pic_name data['result'] = names[idxs] return render(request, 'search.html', data)
def main(): # Connect with the gRPC server server_address = "127.0.0.1:50051" request_timeout = 5.0 channel = grpc.insecure_channel(server_address) stub = predict_pb2.PredictionServiceStub(channel) # Make request data request = predict_pb2.PredictRequest() samples_features = np.array([[10, 10, 10, 8, 6, 1, 8, 9, 1], [10, 10, 10, 8, 6, 1, 8, 9, 1]]) samples_keys = np.array([1, 2]) # Convert numpy to TensorProto request.inputs["features"].CopyFrom( tensor_util.make_tensor_proto(samples_features)) request.inputs["key"].CopyFrom(tensor_util.make_tensor_proto(samples_keys)) # Invoke gRPC request response = stub.Predict(request, request_timeout) # Convert TensorProto to numpy result = {} for k, v in response.outputs.items(): result[k] = tensor_util.MakeNdarray(v) print(result)
def do_post(self, inputs): # Create gRPC client and request grpc_port = int(sys.argv[2]) channel = implementations.insecure_channel(grpc_host, grpc_port) stub = prediction_service_pb2.beta_create_PredictionService_stub( channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version # TODO: don't hard code this! inputs_np = np.asarray([1.0]) #print(inputs_np) inputs_tensor_proto = tf.contrib.util.make_tensor_proto( inputs_np, dtype=tf.float32) request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) # Send request result = stub.Predict(request, request_timeout) #print(result) result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred']) #print(result_np) return result_np
def main(): # Connect with the gRPC server server_address = "127.0.0.1:50051" request_timeout = 5.0 channel = grpc.insecure_channel(server_address) stub = predict_pb2.PredictionServiceStub(channel) # Make request data request = predict_pb2.PredictRequest() image = Image.open('../mnist_jpgs/4/pic_test1010.png') array = np.array(image) / (255 * 1.0) samples_features = array.reshape([-1, 784]) # samples_features = np.array( # [[10, 10, 10, 8, 6, 1, 8, 9, 1], [10, 10, 10, 8, 6, 1, 8, 9, 1]]) samples_keys = np.array([1]) # Convert numpy to TensorProto request.inputs["features"].CopyFrom( tensor_util.make_tensor_proto(samples_features)) request.inputs["key"].CopyFrom(tensor_util.make_tensor_proto(samples_keys)) # Invoke gRPC request response = stub.Predict(request, request_timeout) # Convert TensorProto to numpy result = {} for k, v in response.outputs.items(): result[k] = tensor_util.MakeNdarray(v) print(result)
def test_one_process(i): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout request_batch = FLAGS.benchmark_batch_size request_data = [i for i in range(request_batch)] # Generate inference data features = numpy.asarray(request_data) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['features'].CopyFrom(features_tensor_proto) # Send request request_number = FLAGS.benchmark_test_number #start_time = time.time() for i in range(request_number): result_future = stub.Predict.future(request, request_timeout) #result_future = stub.Predict.future(request, 0.00000001) result_future.add_done_callback(_create_rpc_callback())
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout # Generate inference data keys = numpy.asarray([1, 2, 3]) keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32) features = numpy.asarray([[1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 1, 1, 1, 1, 1, 1, 1, 1], [9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 9, 9, 9, 9, 9, 9, 9, 9]]) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version request.inputs['keys'].CopyFrom(keys_tensor_proto) request.inputs['features'].CopyFrom(features_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result)
def test_mnist_happy_path(self): input_data_file = os.path.join(self.test_data_path, 'mnist_test_data_set_0_input.pb') output_data_file = os.path.join(self.test_data_path, 'mnist_test_data_set_0_output.pb') with open(input_data_file, 'rb') as f: request_payload = f.read() request = predict_pb2.PredictRequest() request.ParseFromString(request_payload) uri = "{}:{}".format(self.server_ip, self.server_port) test_util.test_log(uri) with grpc.insecure_channel(uri) as channel: stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) actual_result = stub.Predict(request) expected_result = predict_pb2.PredictResponse() with open(output_data_file, 'rb') as f: expected_result.ParseFromString(f.read()) for k in expected_result.outputs.keys(): self.assertEqual(actual_result.outputs[k].data_type, expected_result.outputs[k].data_type) count = 1 for i in range(0, len(expected_result.outputs['Plus214_Output_0'].dims)): self.assertEqual(actual_result.outputs['Plus214_Output_0'].dims[i], expected_result.outputs['Plus214_Output_0'].dims[i]) count = count * int(actual_result.outputs['Plus214_Output_0'].dims[i]) actual_array = numpy.frombuffer(actual_result.outputs['Plus214_Output_0'].raw_data, dtype=numpy.float32) expected_array = numpy.frombuffer(expected_result.outputs['Plus214_Output_0'].raw_data, dtype=numpy.float32) self.assertEqual(len(actual_array), len(expected_array)) self.assertEqual(len(actual_array), count) for i in range(0, count): self.assertTrue(test_util.compare_floats(actual_array[i], expected_array[i], rel_tol=0.001))
def run(self): # Convert json input to tensor input_str = self.inputs.decode('utf-8') input_json = json.loads(input_str) inputs_np = np.asarray([input_json['x_observed']]) inputs_tensor_proto = tf.contrib.util.make_tensor_proto( inputs_np, dtype=tf.float32) # Build the PredictRequest from inputs request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) # Create gRPC client and request grpc_port = int(sys.argv[2]) channel = implementations.insecure_channel(grpc_host, grpc_port) stub = prediction_service_pb2.beta_create_PredictionService_stub( channel) # Send request result = stub.Predict(request, request_timeout) # Convert PredictResult into np array result_np = tf.contrib.util.make_ndarray(result.outputs['y_pred']) # Convert np array into json result_json = json.dumps({"y_pred": result_np.tolist()[0]}) return result_json
def _predict_request(self, chari): request = predict_pb2.PredictRequest() request.model_spec.name = 'ner' request.model_spec.signature_name = 'predict_sentence' request.inputs['words'].CopyFrom( _predict_tensor_proto(chari, self._predict_shape)) return request
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout request_batch = FLAGS.benchmark_batch_size request_data = [i for i in range(request_batch)] # Generate inference data features = numpy.asarray(request_data) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['features'].CopyFrom(features_tensor_proto) # Send request request_number = FLAGS.benchmark_test_number start_time = time.time() for i in range(request_number): result = stub.Predict(request, request_timeout) end_time = time.time() print("Average latency is: {} ms".format( (end_time - start_time) * 1000 / request_number))
def do_inference(hostport, work_dir, concurrency, num_tests): """Tests PredictionService with concurrent requests. Args: hostport: Host:port address of the PredictionService. work_dir: The full path of working directory for test data set. concurrency: Maximum number of concurrent requests. num_tests: Number of test images to use. Returns: The classification error rate. Raises: IOError: An error occurred processing test data set. """ test_data_set = input_data.read_data_sets(work_dir).test host, port = hostport.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) result_counter = _ResultCounter(num_tests, concurrency) for _ in range(num_tests): request = predict_pb2.PredictRequest() request.model_spec.name = 'mnist' image, label = test_data_set.next_batch(1) request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image[0], shape=[1, image[0].size])) result_counter.throttle() result_future = stub.Predict.future(request, 5.0) # 5 seconds result_future.add_done_callback( _create_rpc_callback(label[0], result_counter)) return result_counter.get_error_rate()
def predict(server, model, data, timeout=10.0): """Request generic gRPC server with specified data. Args: server: The address of server. Example: "localhost:9000". model: The name of the model. Example: "mnist". data: The json data to request. Example: {"keys_dtype": "int32", "keys": [[1], [2]]}. Returns: The predict result in dictionary format. Example: {"keys": [1, 2]}. """ host, port = server.split(":") channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model for k, v in data.items(): if k.endswith("_dtype") == False: numpy_data = np.array(v) dtype = data[k + "_dtype"] request.inputs[k].CopyFrom( tensor_util.make_tensor_proto(numpy_data, dtype=dtype)) result = stub.Predict(request, timeout) result_dict = {} for k, v in result.outputs.items(): result_dict[k] = get_tensor_values(v) return result_dict
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout ''' Example data: 0 5:1 6:1 17:1 21:1 35:1 40:1 53:1 63:1 71:1 73:1 74:1 76:1 80:1 83:1 1 5:1 7:1 17:1 22:1 36:1 40:1 51:1 63:1 67:1 73:1 74:1 76:1 81:1 83:1 ''' # Generate keys TensorProto keys = numpy.asarray([1, 2]) keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32) # Generate indexs TensorProto indexs = numpy.asarray([[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11], [0, 12], [0, 13], [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], [1, 11], [1, 12], [1, 13]]) indexs_tensor_proto = tf.contrib.util.make_tensor_proto(indexs, dtype=tf.int64) # Generate ids TensorProto ids = numpy.asarray([5, 6, 17, 21, 35, 40, 53, 63, 71, 73, 74, 76, 80, 83, 5, 7, 17, 22, 36, 40, 51, 63, 67, 73, 74, 76, 81, 83]) ids_tensor_proto = tf.contrib.util.make_tensor_proto(ids, dtype=tf.int64) # Generate values TensorProto values = numpy.asarray([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) values_tensor_proto = tf.contrib.util.make_tensor_proto(values, dtype=tf.float32) # Generate values TensorProto shape = numpy.asarray([2, 124]) shape_tensor_proto = tf.contrib.util.make_tensor_proto(shape, dtype=tf.int64) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version request.inputs["keys"].CopyFrom(keys_tensor_proto) request.inputs["indexs"].CopyFrom(indexs_tensor_proto) request.inputs["ids"].CopyFrom(ids_tensor_proto) request.inputs["values"].CopyFrom(values_tensor_proto) request.inputs["shape"].CopyFrom(shape_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result)
def get_outputs(self, input_data): # Step 0: check the type of input parameters if not isinstance(self.ns.host, str): print("The type of \"host\" must be str (string)!") raise IllegalArgumentException if not re_match("^[0-9localhost.:/]+$", self.ns.host): print("hostport does not match preseted character-set" ) raise IllegalArgumentException if not isinstance(self.ns.port, int): print("The type of \"port\* must be int!") raise IllegalArgumentException if not isinstance(self.ns.model_name, str): print("the type of \"model_name\" must be str (string)!") raise IllegalArgumentException if not re_match("^[0-9A-Za-z_. \-/]+$", self.ns.model_name): print("model_name does not match preseted character-set" ) raise IllegalArgumentException if not isinstance(input_data, dict): print("the type of \"input_data\" must be dict!") raise IllegalArgumentException if (not isinstance(MAX_RESPONSE_TIME, int)) and (not isinstance(MAX_RESPONSE_TIME, float)): print("the type of \"max_response_time\" must be int or float!") raise IllegalArgumentException # Setup connection channel = implementations.insecure_channel(self.ns.host, self.ns.port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) # Initialize the request request = predict_pb2.PredictRequest() request.model_spec.name = self.ns.model_name request.model_spec.signature_name = self.ns.model_signature_name #request.model_spec.version = self.ns.model_version_num # Set the input variables of the request for key, value in input_data.items(): if not re_match("^[0-9A-Za-z_. \-/]+$", key): print("model_name does not match preseted character-set" ) raise IllegalArgumentException if isinstance(value, numpy_ndarray): request.inputs[key].CopyFrom(make_tensor_proto(value, shape=list(value.shape))) elif isinstance(value, int) or isinstance(value, float): request.inputs[key].CopyFrom(make_tensor_proto(value) ) else: request.inputs[key].CopyFrom(make_tensor_proto(value, shape=list(value.shape))) # Obtain the result of prediction response = stub.Predict(request, MAX_RESPONSE_TIME) if PRINT_RESPONSE: responseDict = self.print_response(response) return responseDict
def gen_input_pb(pb_full_path, input_name, output_name, request_file_path): t = onnx_ml_pb2.TensorProto() with open(pb_full_path, 'rb') as fin: t.ParseFromString(fin.read()) predict_request = predict_pb2.PredictRequest() predict_request.inputs[input_name].CopyFrom(t) predict_request.output_filter.append(output_name) with open(request_file_path, "wb") as fout: fout.write(predict_request.SerializeToString())
def _predict_request(self, wordi, chari): request = predict_pb2.PredictRequest() request.model_spec.name = 'clfier' request.model_spec.signature_name = 'predict_sentence' predict_shape_word = [1, C_MAX_SENTENCE_LEN] predict_shape_char = [1, C_MAX_SENTENCE_LEN * C_MAX_WORD_LEN] request.inputs['words'].CopyFrom( _predict_tensor_proto( wordi, self._create_predict_shape(predict_shape_word))) request.inputs['chars'].CopyFrom( _predict_tensor_proto( chari, self._create_predict_shape(predict_shape_char))) return request
def do_inference(hostport, num_tests, image, label): host, port = hostport.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) for _ in range(num_tests): request = predict_pb2.PredictRequest() request.model_spec.name = FLAGS.model_name request.model_spec.signature_name = 'predict_images' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image[0], shape=[1, image[0].size])) result_future = stub.Predict.future(request, FLAGS.request_delay) result_future.add_done_callback(_create_rpc_callback(label[0]))
def main(_): host, port = FLAGS.server.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) # Send request with open(FLAGS.image, 'rb') as f: # See prediction_service.proto for gRPC request/response details. data = f.read() request = predict_pb2.PredictRequest() request.model_spec.name = 'inception' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(data, shape=[1])) result = stub.Predict(request, 10.0) # 10 secs timeout print(result)
def __get_request_message(data: np.array) -> predict_pb2.PredictRequest: input_np_array = np.array(data, dtype=np.float32) input_np_array = np.expand_dims(input_np_array, axis=0) input_tensor = onnx_ml_pb2.TensorProto() input_tensor.dims.extend(input_np_array.shape) input_tensor.data_type = 1 # float input_tensor.raw_data = input_np_array.tobytes() request_message = predict_pb2.PredictRequest() request_message.inputs['float_input'].data_type = input_tensor.data_type request_message.inputs['float_input'].dims.extend(input_np_array.shape) request_message.inputs['float_input'].raw_data = input_tensor.raw_data return request_message
def run(self): # Create gRPC client and request channel = implementations.insecure_channel(self.grpc_host, self.grpc_port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = self.model_name if self.model_version > 0: request.model_spec.version.value = self.model_version # Transform raw inputs (ie. json) to TensorFlow Serving Request transformed_input = input_transformer(self.raw_inputs) # Send request output = stub.Predict(transformed_input, self.request_timeout) # Transform TensorFlow Serving Response to raw output (ie. json) return output_transformer(output)
def post(self, model_namespace, model_name, model_version): model_key_list = [ 'tensorflow', model_namespace, model_name, model_version ] try: REQUESTS_COUNT.labels('predict', *model_key_list).inc() model = self.get_model_assets(model_key_list) with REQUEST_LATENCY_BUCKETS.labels('predict', *model_key_list).time(): # TODO: Reuse instead of creating this channel everytime channel = implementations.insecure_channel( self.settings['model_server_tensorflow_serving_host'], int(self.settings['model_server_tensorflow_serving_port'])) stub = prediction_service_pb2.beta_create_PredictionService_stub( channel) # Transform raw inputs to TensorFlow PredictRequest transformed_inputs_request = model.transform_request( self.request.body) inputs_tensor_proto = tf.make_tensor_proto( transformed_inputs_request, dtype=tf.float32) tf_request = predict_pb2.PredictRequest() tf_request.inputs['x_observed'].CopyFrom(inputs_tensor_proto) tf_request.model_spec.name = model_name tf_request.model_spec.version.value = int(model_version) # Transform TensorFlow PredictResponse into output response = stub.Predict(tf_request, self.settings['request_timeout']) response_np = tf.contrib.util.make_ndarray( response.outputs['y_pred']) transformed_response_np = model.transform_response(response_np) self.write(transformed_response_np) self.finish() except Exception as e: message = 'MainHandler.post: Exception - {0} Error {1}'.format( '/'.join(model_key_list), str(e)) LOGGER.info(message) logging.exception(message)
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name if model_version > 0: request.model_spec.version.value = model_version env = gym.make(FLAGS.gym_env) state = env.reset() total_reward = 0 while True: if FLAGS.render_game: time.sleep(0.1) env.render() # Generate inference data features = numpy.asarray([state]) features_tensor_proto = tf.contrib.util.make_tensor_proto( features, dtype=tf.float32) request.inputs['states'].CopyFrom(features_tensor_proto) # Send request result = stub.Predict(request, request_timeout) action = int(result.outputs.get("actions").int64_val[0]) next_state, reward, done, info = env.step(action) total_reward += reward state = next_state if done: print("End of the game, reward: {}".format(total_reward)) break
def test_one_process(i): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout request_batch = FLAGS.benchmark_batch_size request_data = [i for i in range(request_batch)] # Generate inference data features = numpy.asarray(request_data) features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['features'].CopyFrom(features_tensor_proto) # Send request request_number = FLAGS.benchmark_test_number #start_time = time.time() events = [] for i in range(request_number): event = threading.Event() result_future = stub.Predict.future(request, request_timeout) #result_future = stub.Predict.future(request, 0.00000001) result_future.add_done_callback(_create_rpc_callback(event)) events.append(event) #result = stub.Predict(request, request_timeout) #end_time = time.time() #print("Average latency is: {} ms".format((end_time - start_time) * 1000 / request_number)) #print("Average qps is: {}".format(request_number / (end_time - start_time))) for event in events: event.wait()
def model_prediction(): host = "localhost" port = 9000 model_name = "resnet" json = flask.request.get_json() url_input = json['input'] image = urllib.urlopen(url_input).read() channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = 'resnet' request.model_spec.signature_name = 'predict_images' request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image, shape=[1])) result = stub.Predict(request, 10.0) # 10 secs timeout print(result) return result
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout image_filepaths = ["test-image.jpg"] for index, image_filepath in enumerate(image_filepaths): image_ndarray = image.img_to_array(image.load_img(image_filepaths[0], target_size=(224, 224))) image_ndarray = image_ndarray / 255. # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['input_image'].CopyFrom(tf.contrib.util.make_tensor_proto(image_ndarray, shape=[1] + list(image_ndarray.shape))) # Send request result = str(stub.Predict(request, request_timeout)) mylist = result.split('\n')[-8:-3] finallist = [] for element in mylist: element = element.split(':')[1] finallist.append(float("{:.6f}".format(float(element)))) index = finallist.index(max(finallist)) CLASSES = ['Daisy', 'Dandelion', 'Rosa', 'Girasol', 'Tulipán'] ClassPred = CLASSES[index] ClassProb = finallist[index] print(finallist) print(ClassPred) print(ClassProb)
def main(): host = FLAGS.host port = FLAGS.port model_name = FLAGS.model_name model_version = FLAGS.model_version request_timeout = FLAGS.request_timeout # Generate inference data keys = np.asarray([1, 2, 3, 4, 5]) keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32) features = np.ndarray(shape=(5, 32, 32, 3), dtype=np.float32) image_filepaths = [ "../data/inference/Blastoise.png", "../data/inference/Charizard.png", "../data/inference/Mew.png", "../data/inference/Pikachu.png", "../data/inference/Venusaur.png" ] for index, image_filepath in enumerate(image_filepaths): image_ndarray = ndimage.imread(image_filepaths[0], mode="RGB") features[index] = image_ndarray features_tensor_proto = tf.contrib.util.make_tensor_proto(features, dtype=tf.float32) # Create gRPC client and request channel = implementations.insecure_channel(host, port) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.version.value = model_version request.inputs['keys'].CopyFrom(keys_tensor_proto) request.inputs['features'].CopyFrom(features_tensor_proto) # Send request result = stub.Predict(request, request_timeout) print(result)