Esempio n. 1
0
        return ["roi_feat_fpn"]


if __name__ == '__main__':
    logger = setup_logger()
    logger.info("example: RoIAlign")

    m = RoIAlign()

    data = get_inputs(
        "x_level",
        "roi_fpn",
        root="/autox-sz/users/dongqixu/share/trt_plugins/RoIAlign")
    export_onnx(m, data, "model.onnx")
    targets = m(data)

    TensorRTModel.build_engine("model.onnx", "model.trt", 4, device="CUDA")
    e = TensorRTModel("model.trt")
    outputs = e.inference(data)

    # compare torch output and tensorrt output
    assert len(targets) == len(outputs), "Number of outputs does not match!"
    targets = [(k, v.cuda()) for k, v in targets.items()]
    for i, (name, tensor) in enumerate(targets):
        logger.info(name)
        diff = outputs[name] - tensor
        unique = torch.unique(diff)
        logger.info("unique\n{}".format(unique))
        logger.info("max\n{}".format(torch.abs(unique).max()))
        assert torch.abs(unique).max() < 1e-3
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(description="ImageNet inference example")
    parser.add_argument("data", metavar="DIR", help="path to dataset")
    parser.add_argument("-j",
                        "--workers",
                        default=4,
                        type=int,
                        metavar="N",
                        help="number of data loading workers (default: 2)")
    parser.add_argument(
        "-b",
        "--batch-size",
        default=1024,
        type=int,
        metavar="N",
        help="mini-batch size (default: 32), this is the total "
        "batch size of all GPUs on the current node when "
        "using Data Parallel or Distributed Data Parallel")
    parser.add_argument("--output",
                        default="./output",
                        help="output directory for the converted model")
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--fp16", action="store_true")
    parser.add_argument("--int8", action="store_true")
    parser.add_argument("--calibration-batch",
                        type=int,
                        default=1024,
                        help="max calibration batch number")
    parser.add_argument(
        "--format",
        choices=["torch", "onnx", "tensorrt"],
        help="output format",
        default="torch",
    )
    args = parser.parse_args()
    if args.debug:
        verbosity = logging.DEBUG
    else:
        verbosity = logging.INFO
    logger = setup_logger(verbosity=verbosity)
    logger.info("Command line arguments: " + str(args))

    if args.output:
        os.makedirs(args.output, exist_ok=True)
    suffix = "_fp16" if args.fp16 else ""
    suffix += "_int8" if args.int8 else ""
    onnx_f = os.path.join(args.output, "model.onnx")
    engine_f = os.path.join(args.output, "model{}.trt".format(suffix))
    cache_f = os.path.join(args.output, "cache.txt")

    # get data loader
    data_loader = get_data_loader(args.data, args.batch_size, args.workers)

    if args.format == "torch" or args.format == "onnx":
        torch_model = torchvision.models.resnet50(pretrained=True)
        torch_model.cuda()
        model = TorchModel(torch_model)
        if args.format == "onnx":
            data = next(iter(data_loader))
            inputs = model.convert_inputs(data)
            with trace_context(model), torch.no_grad():
                torch.onnx.export(model, (inputs, ),
                                  onnx_f,
                                  verbose=True,
                                  input_names=model.get_input_names(),
                                  output_names=model.get_output_names())
                return
    else:
        if not os.path.exists(engine_f):
            if args.int8:
                ns = types.SimpleNamespace()
                ns.batch_size = args.batch_size
                ns.effective_batch_size = 0
                preprocess = functools.partial(TensorRTEngine.convert_inputs,
                                               ns)
                int8_calibrator = TensorRTModel.get_int8_calibrator(
                    args.calibration_batch, data_loader, preprocess, cache_f)
            else:
                int8_calibrator = None
            TensorRTModel.build_engine(onnx_f,
                                       engine_f,
                                       args.batch_size,
                                       device="CUDA",
                                       fp16_mode=args.fp16,
                                       int8_mode=args.int8,
                                       int8_calibrator=int8_calibrator)
        model = TensorRTEngine(engine_f, args.batch_size)
        model.cuda()

    # validation
    validate(data_loader, model)

    if args.format == "tensorrt":
        model.report_engine_time("engine_time.txt", 0.01)
Esempio n. 3
0
     args.output, "model.onnx")
 engine_f = os.path.join(args.output, "model{}.trt".format(suffix))
 cache_f = args.calibration_cache if args.calibration_cache else os.path.join(
     args.output, "cache.txt")
 assert os.path.isfile(onnx_f), "path {} is not a file".format(onnx_f)
 if not os.path.isfile(engine_f) or (not args.cache
                                     and override(engine_f)):
     if args.int8:
         # get preprocess function from model
         model = tracer.get_onnx_traceable()
         max_calibration_batch = args.calibration_batch
         if os.path.exists(cache_f) and (not args.cache
                                         and override(cache_f)):
             os.remove(cache_f)
         int8_calibrator = TensorRTModel.get_int8_calibrator(
             max_calibration_batch, data_loader, model.convert_inputs,
             cache_f)
         if args.quantization:
             quantization_layers = []
             with open(args.quantization) as f:
                 for line in f:
                     quantization_layers.append(line.strip())
             logger.info(
                 "quantization_layers: {}".format(quantization_layers))
         else:
             quantization_layers = None
         if args.exclude:
             assert not args.quantization, "exclude and quantization cannot be set simultaneously."
             exclude_layers = []
             with open(args.exclude) as f:
                 for line in f: