def main(args):

    input_file_path = args.input_image
    serialized_plan_fp32 = args.engine_file
    HEIGHT = args.height
    WIDTH = args.width

    image = np.asarray(Image.open(input_file_path))
    img = rescale_image(image, (HEIGHT, WIDTH), order=1)
    im = np.array(img, dtype=np.float32, order='C')
    im = im.transpose((2, 0, 1))
    im = sub_mean_chw(im)

    engine = eng.load_engine(trt_runtime, serialized_plan_fp32)
    h_input, d_input, h_output, d_output, stream = inf.allocate_buffers(
        engine, 1, trt.float32)
    out = inf.do_inference(engine, im, h_input, d_input, h_output, d_output,
                           stream, 1, HEIGHT, WIDTH)
    out = color_map(out)

    colorImage_trt = Image.fromarray(out.astype(np.uint8))
    colorImage_trt.save('trt_output.png')

    semantic_model = keras.models.load_model(args.hdf5_file)
    out_keras = semantic_model.predict(im.reshape(-1, 3, HEIGHT, WIDTH))

    out_keras = color_map(out_keras)
    colorImage_k = Image.fromarray(out_keras.astype(np.uint8))
    colorImage_k.save('keras_output.png')
def main(args):

    serialized_plan_fp32 = args.engine_file
    print("[INFO] Loading Engine...")
    engine = eng.load_engine(trt_runtime, serialized_plan_fp32)
    print("[INFO] Allocate Buffer...")
    

    print("[INFO] Apply Inference...")
    disp_tensors_pred = []
    disp_tensors_gt = []
    for i in range(config.NUM_VAL//config.batch_size):
        (force_tensor,disp_tensor_gt) = next(gen)
        h_input, d_input, h_output, d_output, stream = inf.allocate_buffers(engine, config.batch_size, trt.float16)#batch_size
        start = time.time()
        TensorRT_pred = inf.do_inference(engine, force_tensor, h_input, d_input, h_output, d_output, stream, config.batch_size) #batch_size
        end = time.time()
        print("inference time including buffer copy", end-start)
        print("TensorRT_pred",TensorRT_pred.shape)
        disp_tensors_pred.append(TensorRT_pred)
        disp_tensors_gt.append(disp_tensor_gt)
        #break

    disp_tensors_pred = np.asarray(disp_tensors_pred).reshape(-1,config.data_shape[0],config.data_shape[1],config.data_shape[2],config.data_shape[3])
    disp_tensors_gt = np.asarray(disp_tensors_gt).reshape(-1,config.data_shape[0],config.data_shape[1],config.data_shape[2],config.data_shape[3])
    print(disp_tensors_pred.shape)
    Visualize.gen_video( disp_tensors_pred, disp_tensors_gt, config) #visualize the results
def load_trt():
    # load trt engine
    load_tensorrt = timer("Load TRT Engine")
    trt_path = 'alexnet.trt'
    engine = load_engine(trt_runtime, trt_path)
    load_tensorrt.end()

    return engine
Beispiel #4
0
 def __init__(self, label_file, model_file):
     self.labels = self.load_labels(label_file)
     self.engine = eng.load_engine(trt_runtime, model_file)
     self.h_input, self.d_input, self.h_output, self.d_output, self.stream = inf.allocate_buffers(
         self.engine, 1, trt.float32)
     self.context = self.engine.create_execution_context()
     self.width = 224
     self.height = 224
    def __init__(self,
                 trt_engine_path,
                 trt_engine_datatype=trt.DataType.FLOAT,
                 batch_size=1):
        """Initializes TensorRT objects needed for model inference.

        Args:
            trt_engine_path (str): path where TensorRT engine should be stored
            trt_engine_datatype (trt.DataType):
                requested precision of TensorRT engine used for inference
            batch_size (int): batch size for which engine
                should be optimized for
        """

        # We first load all custom plugins shipped with TensorRT,
        # some of them will be needed during inference
        trt.init_libnvinfer_plugins(TRT_LOGGER, '')

        # Initialize runtime needed for loading TensorRT engine from file
        self.trt_runtime = trt.Runtime(TRT_LOGGER)
        # TRT engine placeholder
        self.trt_engine = None

        # Display requested engine settings to stdout
        print("TensorRT inference engine settings:")
        print("  * Inference precision - {}".format(trt_engine_datatype))
        print("  * Max batch size - {}\n".format(batch_size))

        # If we get here, the file with engine exists, so we can load it
        if not self.trt_engine:
            print("Loading cached TensorRT engine from {}".format(
                trt_engine_path))
            self.trt_engine = engine_utils.load_engine(self.trt_runtime,
                                                       trt_engine_path)

        # This allocates memory for network inputs/outputs on both CPU and GPU
        self.inputs, self.outputs, self.bindings, self.stream = \
            engine_utils.allocate_buffers(self.trt_engine)

        # Execution context is needed for inference
        self.context = self.trt_engine.create_execution_context()

        # Allocate memory for multiple usage [e.g. multiple batch inference]
        input_volume = trt.volume((3, 300, 300))
        self.numpy_array = np.zeros(
            (self.trt_engine.max_batch_size, input_volume))
Beispiel #6
0
def sub_mean_chw(data):
   data = data.transpose((1, 2, 0))  # CHW -> HWC
   data -= np.array(MEAN)  # Broadcast subtract
   data = data.transpose((2, 0, 1))  # HWC -> CHW
   return data

def rescale_image(image, output_shape, order=1):
   image = skimage.transform.resize(image, output_shape,
               order=order, preserve_range=True, mode='reflect')
   return image

import engine as eng
import inference as inf
import tensorrt as trt 

input_file_path = "data/yolact_example_0.png"
serialized_plan_fp32 = "my_engine.trt"
HEIGHT = 550
WIDTH = 550

import cv2
img = cv2.imread(input_file_path)
print(img.shape)
dim = (WIDTH, HEIGHT)
img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
print(img.shape)

engine = eng.load_engine(trt_runtime, serialized_plan_fp32)
h_input, d_input, h_output, d_output, stream = inf.allocate_buffers(engine, 1, trt.float32)
out = inf.do_inference(engine, img, h_input, d_input, h_output, d_output, stream, 1, HEIGHT, WIDTH)
Beispiel #7
0
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)


def load_data(path):
    trans = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()])

    img = Image.open(path)
    img_tensor = trans(img).unsqueeze(0)
    return np.array(img_tensor)


# load trt engine
load_trt = timer("Load TRT Engine")
trt_path = 'alexnet.trt'
engine = load_engine(trt_runtime, trt_path)
load_trt.end()

# allocate buffers
inputs, outputs, bindings, stream = common.allocate_buffers(engine)
# load data
inputs[0].host = load_data('../test_photo.jpg')

# inference
infer_trt = timer("TRT Inference")
with engine.create_execution_context() as context:
    trt_outputs = common.do_inference(context,
                                      bindings=bindings,
                                      inputs=inputs,
                                      outputs=outputs,
                                      stream=stream)
faces_embeddings = normalize_vectors(faces_embeddings)

detector = MTCNN()

face_array = extract_face_from_image(input_file_path, detector)

face_pixels = face_array
# scale pixel values
face_pixels = face_pixels.astype('float32')
# standardize pixel values across channels (global)
mean, std = face_pixels.mean(), face_pixels.std()
face_pixels = (face_pixels - mean) / std
# transform face into one sample
samples = np.expand_dims(face_pixels, axis=0)
# make prediction to get embedding

engine = eng.load_engine(trt_runtime, engine_path)

h_input, d_input, h_output, d_output, stream = inf.allocate_buffers(
    engine, 1, trt.float32)
yhat = inf.do_inference(engine, samples, h_input, d_input, h_output, d_output,
                        stream, 1, HEIGHT, WIDTH)

print(yhat.shape)

face_to_predict_embedding = normalize_vectors(yhat)
result = predict_using_min_l2_distance(faces_embeddings, labels,
                                       face_to_predict_embedding)

print('Predicted name: %s' % (str(result).title()))
Beispiel #9
0
def main(args):

    print('Platform: {}'.format(platform.platform()))

    trt_runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING))
    f = open(args.trtmodel, "rb")
    engine = trt_runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

    input_shape = engine.get_binding_shape(0)
    output_shape = engine.get_binding_shape(1)

    print('input shape: {}'.format(input_shape))
    print('output shape: {}'.format(output_shape))

    images = [
        'testtrt/000000001761.jpg', 'testtrt/000000119088.jpg',
        'testtrt/000000139099.jpg', 'testtrt/000000143998.jpg',
        'testtrt/000000222235.jpg', 'testtrt/000000276707.jpg',
        'testtrt/000000386134.jpg', 'testtrt/000000428218.jpg',
        'testtrt/000000530854.jpg', 'testtrt/000000538067.jpg'
    ]

    engine = eng.load_engine(trt_runtime, args.trtmodel)

    #h_input, d_input, h_output, d_output, stream = inf.allocate_buffers(engine, 1, trt.float32)
    inputBuffer = np.zeros(input_shape)
    output = np.empty(output_shape[1], dtype=np.float32)

    # Allocate device memory
    d_input = cuda.mem_alloc(1 * inputBuffer.nbytes)
    d_output = cuda.mem_alloc(1 * output.nbytes)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    def predict_batch(input_data, d_input, stream, bindings, output,
                      d_output):  # result gets copied into output
        # Transfer input_data to device
        cuda.memcpy_htod_async(d_input, input_data, stream)
        # Execute model
        context.execute_async(1, bindings, stream.handle, None)
        # Transfer predictions back
        cuda.memcpy_dtoh_async(output, d_output, stream)
        # Syncronize threads
        stream.synchronize()
        return output

    print("Load model and dependencies...")

    predict_batch(inputBuffer, d_input, stream, bindings, output, d_output)

    print("Begin inferences")
    dtSum = 0.0
    for image in images:
        img = cv2.imread(image, 0)
        img = resize_with_crop_or_pad(img, [input_shape[1], input_shape[2]])
        # Using current time
        initial = datetime.now()
        predict_batch(img.astype(np.float32), d_input, stream, bindings,
                      output, d_output)
        prediction = np.argmax(output)
        dt = (datetime.now() - initial).total_seconds()
        dtSum += dt
        print("Prediction: {} dt {}".format(prediction, dt))
    print("Average time {}".format(dtSum / len(images)))
Beispiel #10
0
def main():
    # set up arguments
    run_case = args.case
    num_images = int(args.num_img)
    model_name = args.model_name

    # set up logging to file - see previous section for more details
    log_filename = "logs/output_tensorrt_{}_{}.log".format(
        model_name, run_case)
    os.makedirs(os.path.dirname(log_filename), exist_ok=True)
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
        datefmt='%m-%d %H:%M',
        filename=log_filename,
        filemode='w')
    logging.getLogger('tensorrt')
    logging.info("#### start model prediction ####")

    # logger to capture errors, warnings, and other information during the build and inference phases
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    trt_runtime = trt.Runtime(TRT_LOGGER)

    # check model directory
    os.makedirs(os.path.dirname("model/"), exist_ok=True)

    # check data availability
    logging.info("check if data is available")
    logging.info("check data")
    if not os.path.exists("data"):
        return logging.error(
            'check if data is available or run download_images.py for generating sample dataset'
        )

    batch_size = 1
    # running inference from tensorrt
    logging.info("check engine")
    engine_name = "model/" + model_name + "_" + run_case + ".plan"
    if os.path.exists(engine_name):
        engine = eng.load_engine(trt_runtime, engine_name)
    else:
        onnx_path = "model/" + model_name + "_" + run_case + ".onnx"
        if not os.path.exists(onnx_path):
            logging.info("convert_torch_to_onnx")
            convert_torch_to_onnx(model_name)
        logging.info("build_engine_from_onnx")
        engine = build_engine_from_onnx(onnx_path, engine_name, batch_size,
                                        TRT_LOGGER)

    logging.info("start interference")
    data_type = trt.float32
    logging.info("allocate_buffers")
    h_input_1, d_input_1, h_output, d_output, stream = allocate_buffers(
        engine, batch_size, data_type)

    # predict images
    logging.info("do_inference")
    predict_images(num_images, engine, h_input_1, d_input_1, h_output,
                   d_output, stream)

    return