return ["roi_feat_fpn"] if __name__ == '__main__': logger = setup_logger() logger.info("example: RoIAlign") m = RoIAlign() data = get_inputs( "x_level", "roi_fpn", root="/autox-sz/users/dongqixu/share/trt_plugins/RoIAlign") export_onnx(m, data, "model.onnx") targets = m(data) TensorRTModel.build_engine("model.onnx", "model.trt", 4, device="CUDA") e = TensorRTModel("model.trt") outputs = e.inference(data) # compare torch output and tensorrt output assert len(targets) == len(outputs), "Number of outputs does not match!" targets = [(k, v.cuda()) for k, v in targets.items()] for i, (name, tensor) in enumerate(targets): logger.info(name) diff = outputs[name] - tensor unique = torch.unique(diff) logger.info("unique\n{}".format(unique)) logger.info("max\n{}".format(torch.abs(unique).max())) assert torch.abs(unique).max() < 1e-3
def main(): parser = argparse.ArgumentParser(description="ImageNet inference example") parser.add_argument("data", metavar="DIR", help="path to dataset") parser.add_argument("-j", "--workers", default=4, type=int, metavar="N", help="number of data loading workers (default: 2)") parser.add_argument( "-b", "--batch-size", default=1024, type=int, metavar="N", help="mini-batch size (default: 32), this is the total " "batch size of all GPUs on the current node when " "using Data Parallel or Distributed Data Parallel") parser.add_argument("--output", default="./output", help="output directory for the converted model") parser.add_argument("--debug", action="store_true") parser.add_argument("--fp16", action="store_true") parser.add_argument("--int8", action="store_true") parser.add_argument("--calibration-batch", type=int, default=1024, help="max calibration batch number") parser.add_argument( "--format", choices=["torch", "onnx", "tensorrt"], help="output format", default="torch", ) args = parser.parse_args() if args.debug: verbosity = logging.DEBUG else: verbosity = logging.INFO logger = setup_logger(verbosity=verbosity) logger.info("Command line arguments: " + str(args)) if args.output: os.makedirs(args.output, exist_ok=True) suffix = "_fp16" if args.fp16 else "" suffix += "_int8" if args.int8 else "" onnx_f = os.path.join(args.output, "model.onnx") engine_f = os.path.join(args.output, "model{}.trt".format(suffix)) cache_f = os.path.join(args.output, "cache.txt") # get data loader data_loader = get_data_loader(args.data, args.batch_size, args.workers) if args.format == "torch" or args.format == "onnx": torch_model = torchvision.models.resnet50(pretrained=True) torch_model.cuda() model = TorchModel(torch_model) if args.format == "onnx": data = next(iter(data_loader)) inputs = model.convert_inputs(data) with trace_context(model), torch.no_grad(): torch.onnx.export(model, (inputs, ), onnx_f, verbose=True, input_names=model.get_input_names(), output_names=model.get_output_names()) return else: if not os.path.exists(engine_f): if args.int8: ns = types.SimpleNamespace() ns.batch_size = args.batch_size ns.effective_batch_size = 0 preprocess = functools.partial(TensorRTEngine.convert_inputs, ns) int8_calibrator = TensorRTModel.get_int8_calibrator( args.calibration_batch, data_loader, preprocess, cache_f) else: int8_calibrator = None TensorRTModel.build_engine(onnx_f, engine_f, args.batch_size, device="CUDA", fp16_mode=args.fp16, int8_mode=args.int8, int8_calibrator=int8_calibrator) model = TensorRTEngine(engine_f, args.batch_size) model.cuda() # validation validate(data_loader, model) if args.format == "tensorrt": model.report_engine_time("engine_time.txt", 0.01)
args.output, "model.onnx") engine_f = os.path.join(args.output, "model{}.trt".format(suffix)) cache_f = args.calibration_cache if args.calibration_cache else os.path.join( args.output, "cache.txt") assert os.path.isfile(onnx_f), "path {} is not a file".format(onnx_f) if not os.path.isfile(engine_f) or (not args.cache and override(engine_f)): if args.int8: # get preprocess function from model model = tracer.get_onnx_traceable() max_calibration_batch = args.calibration_batch if os.path.exists(cache_f) and (not args.cache and override(cache_f)): os.remove(cache_f) int8_calibrator = TensorRTModel.get_int8_calibrator( max_calibration_batch, data_loader, model.convert_inputs, cache_f) if args.quantization: quantization_layers = [] with open(args.quantization) as f: for line in f: quantization_layers.append(line.strip()) logger.info( "quantization_layers: {}".format(quantization_layers)) else: quantization_layers = None if args.exclude: assert not args.quantization, "exclude and quantization cannot be set simultaneously." exclude_layers = [] with open(args.exclude) as f: for line in f: