def get_prediction(image_filename,
                   server_host='localhost',
                   server_port=8001,
                   model_name="bolt",
                   model_version=None):
    """
  Retrieve a prediction from a TensorFlow model server

  :param image:       a bolt image
  :param server_host: the address of the TensorRT inference server
  :param server_port: the port used by the server
  :param model_name: the name of the model
  :param timeout:     the amount of time to wait for a prediction to complete
  :return 0:          the integer predicted in the bolt image
  :return 1:          the confidence scores for all classes
  """
    channel = grpc.insecure_channel(server_host + ':' + str(server_port))
    grpc_stub = grpc_service_pb2_grpc.GRPCServiceStub(channel)

    # Prepare request for Status gRPC
    request = grpc_service_pb2.StatusRequest(model_name=model_name)
    # Call and receive response from Status gRPC
    response = grpc_stub.Status(request)
    # Make sure the model matches our requirements, and get some
    # properties of the model that we need for preprocessing
    batch_size = 1
    verbose = False
    input_name, output_name, c, h, w, format, dtype = parse_model(
        response, model_name, batch_size, verbose)

    filledRequestGenerator = partial(requestGenerator, input_name, output_name,
                                     c, h, w, format, dtype, model_name,
                                     model_version, image_filename)

    # Send requests of batch_size images. If the number of
    # images isn't an exact multiple of batch_size then just
    # start over with the first images until the batch is filled.
    result_filenames = []
    requests = []
    responses = []

    # Send request
    for request in filledRequestGenerator(result_filenames):
        responses.append(grpc_stub.Infer(request))

    # For async, retrieve results according to the send order
    for request in requests:
        responses.append(request.result())

    idx = 0
    for response in responses:
        print("Request {}, batch size {}".format(idx, batch_size))
        label, score = postprocess(response.meta_data.output,
                                   result_filenames[idx], batch_size)
        idx += 1

    return label, score
        required=False,
        default='localhost:8001',
        help='Inference server URL. Default is localhost:8001.')
    parser.add_argument('image_filename',
                        type=str,
                        nargs='?',
                        default=None,
                        help='Input image.')
    FLAGS = parser.parse_args()

    # Create gRPC stub for communicating with the server
    channel = grpc.insecure_channel(FLAGS.url)
    grpc_stub = grpc_service_pb2_grpc.GRPCServiceStub(channel)

    # Prepare request for Status gRPC
    request = grpc_service_pb2.StatusRequest(model_name=FLAGS.model_name)
    # Call and receive response from Status gRPC
    response = grpc_stub.Status(request)
    # Make sure the model matches our requirements, and get some
    # properties of the model that we need for preprocessing
    input_name, output_name, c, h, w, format, dtype = parse_model(
        response, FLAGS.model_name, FLAGS.batch_size, FLAGS.verbose)

    filledRequestGenerator = partial(requestGenerator, input_name, output_name,
                                     c, h, w, format, dtype, FLAGS)

    # Send requests of FLAGS.batch_size images. If the number of
    # images isn't an exact multiple of FLAGS.batch_size then just
    # start over with the first images until the batch is filled.
    result_filenames = []
    requests = []
def get_prediction(image_filename,
                   server_host='localhost',
                   server_port=8001,
                   model_name="end2end-demo",
                   model_version=None):
    """
  Retrieve a prediction from a TensorFlow model server

  :param image:       a end2end-demo image
  :param server_host: the address of the TensorRT inference server
  :param server_port: the port used by the server
  :param model_name: the name of the model
  :param timeout:     the amount of time to wait for a prediction to complete
  :return 0:          the integer predicted in the end2end-demo image
  :return 1:          the confidence scores for all classes
  """
    channel = grpc.insecure_channel(server_host + ':' + str(server_port))
    grpc_stub = grpc_service_pb2_grpc.GRPCServiceStub(channel)

    # https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-guide/docs/http_grpc_api.html#section-api-status
    # Prepare request for Status gRPC
    request = grpc_service_pb2.StatusRequest(model_name=model_name)
    # Call and receive response from Status gRPC
    response = grpc_stub.Status(request)
    print('response:', response)
    # Make sure the model matches our requirements, and get some
    # properties of the model that we need for preprocessing
    batch_size = 1
    verbose = False
    input_name, output_name, c, h, w, format, dtype = parse_model(
        response, model_name, batch_size, verbose)
    logging.info("Got status for model %s:", model_name)
    print("Got status for model" + model_name)
    status_full = "input_name=%s, output_name=%s, c=%s, h=%s, w=%s, format=%s, dtype=%s " % (
        input_name, output_name, c, h, w, format, dtype)
    logging.info(status_full)
    print(status_full)

    filledRequestGenerator = partial(requestGenerator, input_name, output_name,
                                     c, h, w, format, dtype, model_name,
                                     model_version, image_filename)

    # Send requests of batch_size images. If the number of
    # images isn't an exact multiple of batch_size then just
    # start over with the first images until the batch is filled.
    result_filenames = []
    requests = []
    responses = []

    # Send request
    for request in filledRequestGenerator(result_filenames):
        responses.append(grpc_stub.Infer(request))

    # For async, retrieve results according to the send order
    for request in requests:
        responses.append(request.result())
    # print("responses={}".format(responses))

    idx = 0
    logging.info('responses size: %d', len(responses))
    for response in responses:
        print("Request {}, batch size {}".format(idx, batch_size))
        print("result_filenames={}".format(result_filenames))
        print("response={}".format(response))
        count = len(response.meta_data.output)
        if count != 1:
            err = "expected 1 result, got {}:{}".format(
                count, response.request_status.msg)
            raise Exception(err)
        label, score = postprocess(response.meta_data.output,
                                   result_filenames[idx], batch_size)
        idx += 1

    return label, score
 def get_status(self, grpc_stub):
     request = grpc_service_pb2.StatusRequest(model_name=self.model_name)
     status = grpc_stub.Status(request)
     return status