def get_prediction(image_filename, server_host='localhost', server_port=8001, model_name="bolt", model_version=None): """ Retrieve a prediction from a TensorFlow model server :param image: a bolt image :param server_host: the address of the TensorRT inference server :param server_port: the port used by the server :param model_name: the name of the model :param timeout: the amount of time to wait for a prediction to complete :return 0: the integer predicted in the bolt image :return 1: the confidence scores for all classes """ channel = grpc.insecure_channel(server_host + ':' + str(server_port)) grpc_stub = grpc_service_pb2_grpc.GRPCServiceStub(channel) # Prepare request for Status gRPC request = grpc_service_pb2.StatusRequest(model_name=model_name) # Call and receive response from Status gRPC response = grpc_stub.Status(request) # Make sure the model matches our requirements, and get some # properties of the model that we need for preprocessing batch_size = 1 verbose = False input_name, output_name, c, h, w, format, dtype = parse_model( response, model_name, batch_size, verbose) filledRequestGenerator = partial(requestGenerator, input_name, output_name, c, h, w, format, dtype, model_name, model_version, image_filename) # Send requests of batch_size images. If the number of # images isn't an exact multiple of batch_size then just # start over with the first images until the batch is filled. result_filenames = [] requests = [] responses = [] # Send request for request in filledRequestGenerator(result_filenames): responses.append(grpc_stub.Infer(request)) # For async, retrieve results according to the send order for request in requests: responses.append(request.result()) idx = 0 for response in responses: print("Request {}, batch size {}".format(idx, batch_size)) label, score = postprocess(response.meta_data.output, result_filenames[idx], batch_size) idx += 1 return label, score
required=False, default='localhost:8001', help='Inference server URL. Default is localhost:8001.') parser.add_argument('image_filename', type=str, nargs='?', default=None, help='Input image.') FLAGS = parser.parse_args() # Create gRPC stub for communicating with the server channel = grpc.insecure_channel(FLAGS.url) grpc_stub = grpc_service_pb2_grpc.GRPCServiceStub(channel) # Prepare request for Status gRPC request = grpc_service_pb2.StatusRequest(model_name=FLAGS.model_name) # Call and receive response from Status gRPC response = grpc_stub.Status(request) # Make sure the model matches our requirements, and get some # properties of the model that we need for preprocessing input_name, output_name, c, h, w, format, dtype = parse_model( response, FLAGS.model_name, FLAGS.batch_size, FLAGS.verbose) filledRequestGenerator = partial(requestGenerator, input_name, output_name, c, h, w, format, dtype, FLAGS) # Send requests of FLAGS.batch_size images. If the number of # images isn't an exact multiple of FLAGS.batch_size then just # start over with the first images until the batch is filled. result_filenames = [] requests = []
def get_prediction(image_filename, server_host='localhost', server_port=8001, model_name="end2end-demo", model_version=None): """ Retrieve a prediction from a TensorFlow model server :param image: a end2end-demo image :param server_host: the address of the TensorRT inference server :param server_port: the port used by the server :param model_name: the name of the model :param timeout: the amount of time to wait for a prediction to complete :return 0: the integer predicted in the end2end-demo image :return 1: the confidence scores for all classes """ channel = grpc.insecure_channel(server_host + ':' + str(server_port)) grpc_stub = grpc_service_pb2_grpc.GRPCServiceStub(channel) # https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-guide/docs/http_grpc_api.html#section-api-status # Prepare request for Status gRPC request = grpc_service_pb2.StatusRequest(model_name=model_name) # Call and receive response from Status gRPC response = grpc_stub.Status(request) print('response:', response) # Make sure the model matches our requirements, and get some # properties of the model that we need for preprocessing batch_size = 1 verbose = False input_name, output_name, c, h, w, format, dtype = parse_model( response, model_name, batch_size, verbose) logging.info("Got status for model %s:", model_name) print("Got status for model" + model_name) status_full = "input_name=%s, output_name=%s, c=%s, h=%s, w=%s, format=%s, dtype=%s " % ( input_name, output_name, c, h, w, format, dtype) logging.info(status_full) print(status_full) filledRequestGenerator = partial(requestGenerator, input_name, output_name, c, h, w, format, dtype, model_name, model_version, image_filename) # Send requests of batch_size images. If the number of # images isn't an exact multiple of batch_size then just # start over with the first images until the batch is filled. result_filenames = [] requests = [] responses = [] # Send request for request in filledRequestGenerator(result_filenames): responses.append(grpc_stub.Infer(request)) # For async, retrieve results according to the send order for request in requests: responses.append(request.result()) # print("responses={}".format(responses)) idx = 0 logging.info('responses size: %d', len(responses)) for response in responses: print("Request {}, batch size {}".format(idx, batch_size)) print("result_filenames={}".format(result_filenames)) print("response={}".format(response)) count = len(response.meta_data.output) if count != 1: err = "expected 1 result, got {}:{}".format( count, response.request_status.msg) raise Exception(err) label, score = postprocess(response.meta_data.output, result_filenames[idx], batch_size) idx += 1 return label, score
def get_status(self, grpc_stub): request = grpc_service_pb2.StatusRequest(model_name=self.model_name) status = grpc_stub.Status(request) return status