Exemple #1
0
 def create_session(self):
     sess_opt = ort.SessionOptions()
     sess_opt.enable_profiling = self.profiling
     sess = ort.InferenceSession(self.model, sess_options=sess_opt, providers=[self.provider])
     return sess
Exemple #2
0
 def testSessionOptionsAddConfigEntry(self):
     so = onnxrt.SessionOptions()
     key = "CONFIG_KEY"
     val = "CONFIG_VAL"
     so.add_session_config_entry(key, val)
     self.assertEqual(so.get_session_config_entry(key), val)
    def _create_ort_training_session(self):
        # Validating frozen_weights names
        unused_frozen_weights = [n for n in self.options.utils.frozen_weights\
            if n not in [i.name for i in self._onnx_model.graph.initializer]]
        if unused_frozen_weights:
            raise RuntimeError(
                "{} params from 'frozen_weights' not found in the ONNX model.".
                format(unused_frozen_weights))

        # Get loss name from model description
        loss_name = [
            item.name for item in self.model_desc.outputs if item.is_loss
        ]
        assert len(
            loss_name
        ) == 1, f"Only one loss output is supported ({len(loss_name)} were specified)"
        loss_name = loss_name[0]

        # Parse optimizer parameters
        optimizer_attributes_map = {}
        optimizer_int_attributes_map = {}
        trainable_params = set()
        for initializer in self._onnx_model.graph.initializer:
            if initializer.name in self.options.utils.frozen_weights:
                continue  # only trainable parameters are passed to the backend
            trainable_params.add(initializer.name)
            optimizer_attributes_map[initializer.name] = {}
            optimizer_int_attributes_map[initializer.name] = {}
            not_in_param_groups = True
            for param_group in self.optim_config.params:
                if initializer.name not in param_group['params']:
                    continue  # keep looking for a matching param_group
                not_in_param_groups = False
                for k, v in param_group.items():
                    # 'params' is not a hyper parameter, skip it. 'lr' per weight is not supported
                    if k == 'params' or k == 'lr':
                        continue
                    if isinstance(v, float):
                        optimizer_attributes_map[initializer.name][k] = v
                    elif isinstance(v, int):
                        optimizer_int_attributes_map[initializer.name][k] = v
                    else:
                        raise ValueError(
                            "Optimizer attributes must be either float or int."
                        )

            # set default values for params not found in groups
            if not_in_param_groups:
                for k, v in self.optim_config.defaults.items():
                    if k == 'lr':
                        continue
                    if isinstance(v, float):
                        optimizer_attributes_map[initializer.name][k] = v
                    elif isinstance(v, int):
                        optimizer_int_attributes_map[initializer.name][k] = v
                    else:
                        raise ValueError(
                            "Optimizer attributes must be either float or int."
                        )

        # TrainingParameters
        ort_parameters = ort.TrainingParameters()
        ort_parameters.loss_output_name = loss_name
        ort_parameters.use_mixed_precision = self.options.mixed_precision.enabled
        ort_parameters.world_rank = self.options.distributed.world_rank
        ort_parameters.world_size = self.options.distributed.world_size
        ort_parameters.gradient_accumulation_steps = self.options.batch.gradient_accumulation_steps
        ort_parameters.allreduce_post_accumulation = self.options.distributed.allreduce_post_accumulation
        ort_parameters.deepspeed_zero_stage = self.options.distributed.deepspeed_zero_optimization.stage
        ort_parameters.enable_grad_norm_clip = self.options.utils.grad_norm_clip
        ort_parameters.set_gradients_as_graph_outputs = False
        ort_parameters.use_invertible_layernorm_grad = self.options.utils.invertible_layer_norm_gradient
        ort_parameters.training_optimizer_name = self.optim_config.name
        ort_parameters.lr_params_feed_name = self.model_desc.learning_rate.name
        ort_parameters.weights_to_train = trainable_params
        ort_parameters.optimizer_attributes_map = optimizer_attributes_map
        ort_parameters.optimizer_int_attributes_map = optimizer_int_attributes_map

        ort_parameters.attn_dropout_recompute = self.options.graph_transformer.attn_dropout_recompute
        ort_parameters.gelu_recompute = self.options.graph_transformer.gelu_recompute
        ort_parameters.transformer_layer_recompute = self.options.graph_transformer.transformer_layer_recompute
        ort_parameters.number_recompute_layers = self.options.graph_transformer.number_recompute_layers
        ort_parameters.model_with_training_graph_path = self.options.debug.model_with_training_graph_path

        # SessionOptions
        session_options = ort.SessionOptions()
        session_options.use_deterministic_compute = self.options.debug.deterministic_compute
        if (self.options.graph_transformer.attn_dropout_recompute
                or self.options.graph_transformer.gelu_recompute
                or self.options.graph_transformer.transformer_layer_recompute):
            session_options.execution_order = ort.ExecutionOrder.PRIORITY_BASED

        # old ort session may already exists and occupies GPU memory when creating new session, this may cause OOM error.
        # for example, load_state_dict will be called before returing the function, and it calls _init_session again
        del self._training_session
        # TrainingSession
        self._training_session = ort.TrainingSession(
            self._onnx_model.SerializeToString(), ort_parameters,
            session_options)

        # I/O bindings
        self._train_io_binding = self._training_session.io_binding()
        self._eval_io_binding = self._training_session.io_binding()
cv2.namedWindow(WINDOW)
if len(sys.argv) > 1:
    capture = cv2.VideoCapture(sys.argv[1])
    mirror_img = False
else:
    capture = cv2.VideoCapture(2)
    mirror_img = True

if capture.isOpened():
    hasFrame, frame = capture.read()
    frame_ct = 0
else:
    hasFrame = False

onnx_file_name = 'Preprocess1x256x256xBGRxByte.onnx'
sess_options = onnxruntime.SessionOptions()
sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_options.enable_profiling = True
ort_session = onnxruntime.InferenceSession(onnx_file_name, sess_options)

input_name = ort_session.get_inputs()[0].name

while hasFrame:
    img1, img2, scale, pad = resize_pad(frame)

    img_in = np.expand_dims(img1, axis=0).astype(np.uint8)
    ort_inputs = {input_name: img_in}

    ort_outs = ort_session.run(None, ort_inputs)

    imgDisp = ort_outs[0][0]
Exemple #5
0
 def getSingleSessionProfilingStartTime():
     so = onnxrt.SessionOptions()
     so.enable_profiling = True
     sess = onnxrt.InferenceSession(get_name("mul_1.onnx"),
                                    sess_options=so)
     return sess.get_profiling_start_time_ns()
Exemple #6
0
def inference(onnx_model, model_dir, examples):
    quantized_str = ''
    if 'quantized' in onnx_model:
        quantized_str = 'quantized'
    onnx_inference = []
    #     pytorch_inference = []
    # onnx session
    options = ort.SessionOptions()
    options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
    options.intra_op_num_threads = 16  # does not seem to make a difference, always parallelized
    # options.inter_op_num_threads = multiprocessing.cpu_count()

    # logger.info(onnx_model)
    ort_session = ort.InferenceSession(onnx_model, options)

    # pytorch pretrained model and tokenizer
    if 'bertweet' in onnx_model:
        tokenizer = AutoTokenizer.from_pretrained(model_dir,
                                                  normalization=True)
    else:
        tokenizer = AutoTokenizer.from_pretrained(model_dir)

    tokenizer_str = "TokenizerFast"

    # logger.info("**************** {} ONNX inference with batch tokenization and with {} tokenizer****************".format(
    #     quantized_str, tokenizer_str))
    start_batch_tokenization = time.time()
    tokens_dict = tokenizer.batch_encode_plus(examples, max_length=128)
    token0 = get_tokens(tokens_dict, 0)

    examples_chunks_list = chunkIt(examples, NUM_BATCHES)
    tokens_dict_list = [
        tokenizer.batch_encode_plus(chunk, padding='longest')
        for chunk in examples_chunks_list
    ]
    # tokens_dict_list = [tokenizer.batch_encode_plus(chunk, max_length=128) for chunk in examples_chunks_list]

    minibatches_list = []
    for i, token_batch in enumerate(tokens_dict_list):
        minibatch = {}
        number_examples_in_this_batch = len(token_batch['input_ids'])
        minibatch['input_ids'] = np.stack(([
            get_tokens(token_batch, i)['input_ids'][0]
            for i in range(number_examples_in_this_batch)
        ]),
                                          axis=0)
        minibatch['token_type_ids'] = np.stack(([
            get_tokens(token_batch, i)['token_type_ids'][0]
            for i in range(number_examples_in_this_batch)
        ]),
                                               axis=0)
        minibatch['attention_mask'] = np.stack(([
            get_tokens(token_batch, i)['attention_mask'][0]
            for i in range(number_examples_in_this_batch)
        ]),
                                               axis=0)
        # logger.info('y')
        minibatches_list.append(minibatch)

    # tokens_dict = tokenizer.batch_encode_plus(examples, padding='longest')
    total_batch_tokenization_time = time.time() - start_batch_tokenization
    total_inference_time = 0
    total_build_label_time = 0
    start_onnx_inference_batch = time.time()

    # for i in range(len(examples)):
    for i, minibatch in enumerate(minibatches_list):
        """
        Onnx inference with batch tokenization
        """

        # if i % 100 == 0:
        # logger.info(i, '/', NUM_BATCHES)

        tokens = get_tokens(tokens_dict, i)
        # inference
        start_inference = time.time()
        # ort_outs = ort_session.run(None, tokens)
        ort_outs = ort_session.run(None, minibatch)
        total_inference_time = total_inference_time + (time.time() -
                                                       start_inference)
        # build label
        start_build_label = time.time()
        torch_onnx_output = torch.tensor(ort_outs[0], dtype=torch.float32)
        onnx_logits = F.softmax(torch_onnx_output, dim=1)
        logits_label = torch.argmax(onnx_logits, dim=1)
        label = logits_label.detach().cpu().numpy()
        #         onnx_inference.append(label[0])
        # onnx_inference.append(onnx_logits.detach().cpu().numpy().tolist())

        # TODO might be able to make this faster by using arrays with pre-defined size isntead of mutating lists like this
        onnx_inference = onnx_inference + onnx_logits.detach().cpu().numpy(
        ).tolist()
        # onnx_inference.append(onnx_logits.detach().cpu().numpy()[0].tolist())
        # total_build_label_time = total_build_label_time + (time.time() - start_build_label)
    #         logger.info(i, label[0], onnx_logits.detach().cpu().numpy()[0].tolist(), type(onnx_logits.detach().cpu().numpy()[0]) )

    end_onnx_inference_batch = time.time()
    # logger.info("Total batch tokenization time (in seconds): ", total_batch_tokenization_time)
    # logger.info("Total inference time (in seconds): ", total_inference_time)
    # logger.info("Total build label time (in seconds): ", total_build_label_time)
    # logger.info("Duration ONNX inference (in seconds) with {} and batch tokenization: ".format(tokenizer_str),
    # logger.info("Duration ONNX inference (in seconds): ",
    #       end_onnx_inference_batch - start_onnx_inference_batch,
    #       (end_onnx_inference_batch - start_onnx_inference_batch) / len(examples))
    # logger.info(onnx_inference)
    return onnx_inference
Exemple #7
0
def main():
    args = parse_arguments()
    setup_logger(args.verbose)
    dump_environment()

    enable_past_input = args.enable_past_input

    cache_dir = args.cache_dir
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)

    output_dir = args.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    (model_class, tokenizer_class,
     model_name_or_path) = MODEL_CLASSES[args.model_type]

    tokenizer = tokenizer_class.from_pretrained(model_name_or_path,
                                                cache_dir=cache_dir)
    model = model_class.from_pretrained(model_name_or_path,
                                        cache_dir=cache_dir)
    model.eval().cpu()

    inputs = tokenizer.encode_plus("Here is an example input for GPT2 model",
                                   add_special_tokens=True,
                                   return_tensors='pt')
    input_ids = inputs['input_ids']
    outputs = model(input_ids=input_ids, past=None)

    num_layer = model.config.n_layer
    present_names = [f'present_{i}' for i in range(num_layer)]
    output_names = ["last_state"] + present_names

    input_names = ['input_ids']
    dynamic_axes = {
        'input_ids': {
            0: 'batch_size',
            1: 'seq_len'
        },
        'last_state': {
            0: 'batch_size',
            1: 'seq_len'
        }
    }
    for name in present_names:
        dynamic_axes[name] = {1: 'batch_size', 3: 'seq_len'}

    if enable_past_input:
        past_names = [f'past_{i}' for i in range(num_layer)]
        input_names = ['input_ids'] + past_names
        dummy_past = [
            torch.zeros(list(outputs[1][0].shape)) for _ in range(num_layer)
        ]
        for name in past_names:
            dynamic_axes[name] = {1: 'batch_size', 3: 'seq_len'}
        export_inputs = (inputs['input_ids'], tuple(dummy_past))
    else:
        export_inputs = (inputs['input_ids'])

    export_model_path = os.path.join(
        output_dir, 'gpt2_past{}.onnx'.format(int(enable_past_input)))

    torch.onnx.export(model,
                      args=export_inputs,
                      f=export_model_path,
                      input_names=input_names,
                      output_names=output_names,
                      dynamic_axes=dynamic_axes,
                      opset_version=11,
                      do_constant_folding=True,
                      verbose=False)

    # Let's run performance test on PyTorch before updating environment variable.
    past = dummy_past if enable_past_input else None
    outputs = pytorch_inference(model,
                                input_ids,
                                past,
                                total_runs=args.total_runs)

    # setup environment variables before importing onnxruntime.
    setup_environment(args.use_openmp)
    import onnxruntime

    if enable_past_input:
        onnx_model_path = export_model_path
    else:
        onnx_model_path = os.path.join(
            output_dir, 'gpt2_past{}_out1.onnx'.format(int(enable_past_input)))
        remove_past_outputs(export_model_path, onnx_model_path)

    if args.enable_optimization:
        from optimizer import optimize_model
        m = optimize_model(onnx_model_path,
                           model_type='gpt2',
                           num_heads=12,
                           hidden_size=768,
                           opt_level=0,
                           optimization_options=None)
        onnx_model_path = os.path.join(
            output_dir,
            'gpt2_past{}_optimized.onnx'.format(int(enable_past_input)))
        m.save_model_to_file(onnx_model_path)

    if 'CUDAExecutionProvider' in onnxruntime.get_available_providers():
        logger.warning(
            "onnxruntime-gpu is not built with OpenMP. You might try onnxruntime package to test CPU inference."
        )

    sess_options = onnxruntime.SessionOptions()

    if args.use_openmp:
        sess_options.intra_op_num_threads = 1
    else:
        sess_options.intra_op_num_threads = psutil.cpu_count(logical=True)
    logger.info(
        f"session option: intra_op_num_threads={sess_options.intra_op_num_threads}"
    )

    logger.info(f"Start inferencing onnx model: {onnx_model_path}")
    session = onnxruntime.InferenceSession(onnx_model_path,
                                           sess_options,
                                           providers=['CPUExecutionProvider'])

    ort_outputs = onnxruntime_inference(session, input_ids, past,
                                        args.total_runs)
    if args.verify_outputs:
        logger.info(
            'PyTorch and OnnxRuntime output 0 (last_state) are close:'.format(
                0),
            numpy.allclose(ort_outputs[0],
                           outputs[0].cpu(),
                           rtol=1e-05,
                           atol=1e-04))

        for layer in range(model.config.n_layer):
            logger.info(
                'PyTorch and OnnxRuntime layer {} state (present_{}) are close:'
                .format(layer, layer),
                numpy.allclose(ort_outputs[1 + layer],
                               outputs[1][layer].cpu(),
                               rtol=1e-05,
                               atol=1e-04))
def main():
    model = GoogleModel(64, 5, 2, 32, 8, True, None, False)

    batch_size = 1000
    input = torch.randn(batch_size, 5, 9, 9)
    outputs = model(input)
    model.eval()

    # model(input)[0][0, 0].item()
    # print("torch CPU:", timeit.timeit(lambda: model(input)[0][0, 0].item(), number=20))

    # model.cuda()
    # input = input.cuda()
    # model(input)[0][0, 0].item()
    # print("torch CUDA:", timeit.timeit(lambda: model(input)[0][0, 0].item(), number=20))

    print("Exporting to onnx")
    torch.onnx.export(
        model,
        input,
        "../../data/onnx/small.onnx",
        example_outputs=outputs,
        opset_version=12,
        input_names=["input"],
        output_names=["value", "policy"],
        dynamic_axes={
            "input": {
                0: "batch_size"
            },
            "value": {
                0: "batch_size"
            },
            "policy": {
                0: "batch_size"
            }
        },
        training=TrainingMode.EVAL,
    )

    # print("Quantizing")
    # quantize_dynamic("../../data/onnx/small.onnx", "../../data/onnx/small_quant.onnx", weight_type=QuantType.QUInt8)

    print("Loading onnx")
    model = onnx.load("../../data/onnx/small.onnx")
    onnx.checker.check_model(model)
    model = onnx.load("../../data/onnx/small_quant.onnx")
    onnx.checker.check_model(model)

    print("Building profile")
    sess_options = onnxruntime.SessionOptions()
    sess_options.enable_profiling = True
    # sess_options.log_severity_level = 0
    session = onnxruntime.InferenceSession("../../data/onnx/small.onnx",
                                           sess_options=sess_options)

    print("Running model")
    onnx_input = input.cpu().numpy()
    _ = session.run(None, {"input": onnx_input})
    _ = session.run(None, {"input": onnx_input})
    rounds = 100
    delta = timeit.timeit(lambda: session.run(None, {"input": onnx_input}),
                          number=rounds)
    throughput = batch_size * rounds / delta
    print(f"onnx cuda (?): {throughput} boards/s")

def load_a_batch(batch_filenames):
    unconcatenated_batch_data = []
    for image_filename in batch_filenames:
        image_filepath = imagenet_path + '/' + image_filename
        nchw_data = load_and_resize_image(image_filepath, height, width)
        unconcatenated_batch_data.append(nchw_data)
    batch_data = np.concatenate(unconcatenated_batch_data, axis=0)

    return batch_data


#print("Device: " + rt.get_device())

sess_options = rt.SessionOptions()
if CPU_THREADS > 0:
    sess_options.enable_sequential_execution = False
    sess_options.session_thread_pool_size = CPU_THREADS
sess = rt.InferenceSession(model_path, sess_options)

input_layer_names = [
    x.name for x in sess.get_inputs()
]  # FIXME: check that input_layer_name belongs to this list
input_layer_name = input_layer_name or input_layer_names[0]

output_layer_names = [
    x.name for x in sess.get_outputs()
]  # FIXME: check that output_layer_name belongs to this list
output_layer_name = output_layer_name or output_layer_names[0]
Exemple #10
0
def test_modulated_deform_conv2d():
    try:
        from mmcv.ops import ModulatedDeformConv2d, get_onnxruntime_op_path
    except (ImportError, ModuleNotFoundError):
        pytest.skip('modulated_deform_conv op is not successfully compiled')

    ort_custom_op_path = get_onnxruntime_op_path()
    # modulated deform conv config
    in_channels = 3
    out_channels = 64
    stride = 1
    padding = 0
    dilation = 1
    groups = 1
    deform_groups = 1
    kernel_size = 3

    input = torch.rand(1, in_channels, 28, 28).cuda()  # (n, c, h, w)
    conv_offset = nn.Conv2d(in_channels=3,
                            out_channels=deform_groups * 3 * kernel_size *
                            kernel_size,
                            kernel_size=kernel_size,
                            stride=stride,
                            padding=padding,
                            dilation=dilation,
                            bias=True).cuda()
    conv_offset.cuda()
    out = conv_offset(input)
    o1, o2, mask = torch.chunk(out, 3, dim=1)
    offset = torch.cat((o1, o2), dim=1)
    mask = torch.sigmoid(mask)

    model_with_bias = ModulatedDeformConv2d(in_channels,
                                            out_channels,
                                            kernel_size,
                                            stride,
                                            padding,
                                            dilation,
                                            groups,
                                            deform_groups,
                                            bias=True)
    model_without_bias = ModulatedDeformConv2d(in_channels,
                                               out_channels,
                                               kernel_size,
                                               stride,
                                               padding,
                                               dilation,
                                               groups,
                                               deform_groups,
                                               bias=False)
    models = [model_with_bias.cuda(), model_without_bias.cuda()]

    for model in models:
        # export and load onnx model
        with torch.no_grad():
            torch.onnx.export(model, (input, offset, mask),
                              onnx_file,
                              export_params=True,
                              keep_initializers_as_inputs=True,
                              input_names=['input', 'offset', 'mask'],
                              opset_version=11)

        session_options = rt.SessionOptions()
        if os.path.exists(ort_custom_op_path):
            session_options.register_custom_ops_library(ort_custom_op_path)

        # compute onnx_output
        sess = rt.InferenceSession(onnx_file, session_options)
        onnx_output = sess.run(
            None, {
                'input': input.cpu().detach().numpy(),
                'offset': offset.cpu().detach().numpy(),
                'mask': mask.cpu().detach().numpy()
            })[0]

        # compute pytorch_output
        with torch.no_grad():
            pytorch_output = model(input, offset, mask).cpu()
        # allclose
        assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
Exemple #11
0
def test_deform_conv2d(threshold=1e-3):
    try:
        from mmcv.ops import DeformConv2d, get_onnxruntime_op_path
    except (ImportError, ModuleNotFoundError):
        pytest.skip('deform_conv op is not successfully compiled')

    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('custom ops for onnxruntime are not compiled.')

    # deform conv config
    # modulated deform conv config
    in_channels = 1
    out_channels = 64
    stride = 1
    padding = 0
    dilation = 1
    groups = 1
    deform_groups = 1
    kernel_size = 2
    input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
    offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]],
                     [[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]],
                     [[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]],
                     [[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]]
    offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7]
    deform_weight = [[[0.4, 0.2, 0.1, 0.9]]]

    x = torch.tensor(input)
    conv_offset = nn.Conv2d(in_channels=in_channels,
                            out_channels=deform_groups * 2 * kernel_size *
                            kernel_size,
                            kernel_size=kernel_size,
                            stride=stride,
                            padding=padding,
                            dilation=dilation,
                            bias=True)

    conv_offset.weight.data = torch.nn.Parameter(
        torch.Tensor(offset_weight).reshape(8, 1, 2, 2))
    conv_offset.bias.data = torch.nn.Parameter(
        torch.Tensor(offset_bias).reshape(8))

    offset = conv_offset(x)

    model = DeformConv2d(in_channels, out_channels, kernel_size, stride,
                         padding, dilation, groups, deform_groups)

    model.weight.data = torch.nn.Parameter(
        torch.Tensor(deform_weight).reshape(1, 1, 2, 2))

    with torch.no_grad():
        torch.onnx.export(model, (x, offset),
                          onnx_file,
                          export_params=True,
                          keep_initializers_as_inputs=True,
                          input_names=['input', 'offset'],
                          opset_version=11)

    session_options = rt.SessionOptions()
    if os.path.exists(ort_custom_op_path):
        session_options.register_custom_ops_library(ort_custom_op_path)

    # compute onnx_output
    sess = rt.InferenceSession(onnx_file, session_options)
    onnx_output = sess.run(
        None, {
            'input': x.cpu().detach().numpy(),
            'offset': offset.cpu().detach().numpy(),
        })[0]

    # compute pytorch_output
    with torch.no_grad():
        pytorch_output = model(x, offset).cpu()
    # allclose
    assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
Exemple #12
0
def test_cummax_cummin(key, opset=11):
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')

    # Note generally `cummax` or `cummin` is exportable to ONNX
    # as long as the pytorch version >= 1.5.0, since `torch.cummax`
    # is only supported with torch >= 1.5.0.
    # But when `cummax` or `cummin` serves as an intermediate component
    # whose outputs is used as inputs for another modules, it's expected
    # that pytorch version must be >= 1.7.0. Otherwise error appears like:
    # `RuntimeError: tuple  appears in op that does not forward tuples,
    # unsupported 'kind: prim::PythonOp`.
    if version.parse(torch.__version__) < version.parse('1.7.0'):
        pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0')

    # register custom op `mmcv::cummax` and `mmcv::cummin`
    from mmcv.onnx.symbolic import register_extra_symbolics
    register_extra_symbolics(opset)

    from mmcv.ops import get_onnxruntime_op_path
    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('custom ops for onnxruntime are not compiled.')

    input_list = [
        # arbitrary shape, e.g. 1-D, 2-D, 3-D, ...
        torch.rand((2, 3, 4, 1, 5)),
        torch.rand((1)),
        torch.rand((2, 0, 1)),  # tensor.numel() is 0
        torch.FloatTensor(),  # empty tensor
    ]

    cummax_cummin_funcs = {'cummax': torch.cummax, 'cummin': torch.cummin}

    for input in input_list:
        ndims = input.dim()
        # valid dim range is [-ndims, ndims-1]
        # test for all `dim` value which is valid
        for dim in range(-ndims, ndims):
            cummax_func = partial(cummax_cummin_funcs[key], dim=dim)
            wrapped_model = WrapFunction(cummax_func).eval()

            with torch.no_grad():
                torch.onnx.export(wrapped_model,
                                  input,
                                  onnx_file,
                                  export_params=True,
                                  keep_initializers_as_inputs=True,
                                  input_names=['input'],
                                  output_names=['output', 'indices'],
                                  opset_version=opset)

            onnx_model = onnx.load(onnx_file)
            input_all = [node.name for node in onnx_model.graph.input]
            input_initializer = [
                node.name for node in onnx_model.graph.initializer
            ]
            net_feed_input = list(set(input_all) - set(input_initializer))
            assert (len(net_feed_input) == 1)

            session_options = rt.SessionOptions()
            session_options.register_custom_ops_library(ort_custom_op_path)
            sess = rt.InferenceSession(onnx_file, session_options)
            ort_output, ort_inds = sess.run(None,
                                            {'input': input.detach().numpy()})
            pytorch_output, pytorch_inds = wrapped_model(input.clone())
            pytorch_output = pytorch_output.detach().numpy()
            pytorch_inds = pytorch_inds.detach().numpy()
            assert np.allclose(pytorch_output, ort_output, atol=1e-5)
            assert np.all(pytorch_inds == ort_inds)
Exemple #13
0
def test_roialign_rotated():
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')
    try:
        from mmcv.ops import get_onnxruntime_op_path, roi_align_rotated
    except (ImportError, ModuleNotFoundError):
        pytest.skip('roi_align_aligned op is not successfully compiled')

    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('custom ops for onnxruntime are not compiled.')
    # roi align config
    pool_h = 2
    pool_w = 2
    spatial_scale = 1.0
    sampling_ratio = 2

    inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., 0]]),
              ([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., np.pi / 2]]),
              ([[[[1., 2.], [3., 4.]],
                 [[4., 3.], [2., 1.]]]], [[0., 0.5, 0.5, 1., 1., 0]]),
              ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
                  [11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3., 0]]),
              ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
                  [11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3.,
                                             np.pi / 2]])]

    def warpped_function(torch_input, torch_rois):
        return roi_align_rotated(torch_input, torch_rois, (pool_w, pool_h),
                                 spatial_scale, sampling_ratio, True, False)

    for case in inputs:
        np_input = np.array(case[0], dtype=np.float32)
        np_rois = np.array(case[1], dtype=np.float32)
        input = torch.from_numpy(np_input)
        rois = torch.from_numpy(np_rois)

        # compute pytorch_output
        with torch.no_grad():
            pytorch_output = roi_align_rotated(input, rois, (pool_w, pool_h),
                                               spatial_scale, sampling_ratio,
                                               True, False)

        # export and load onnx model
        wrapped_model = WrapFunction(warpped_function)
        with torch.no_grad():
            torch.onnx.export(wrapped_model, (input, rois),
                              onnx_file,
                              export_params=True,
                              keep_initializers_as_inputs=True,
                              input_names=['features', 'rois'],
                              opset_version=11)

        onnx_model = onnx.load(onnx_file)
        session_options = rt.SessionOptions()
        if os.path.exists(ort_custom_op_path):
            session_options.register_custom_ops_library(ort_custom_op_path)

        # compute onnx_output
        input_all = [node.name for node in onnx_model.graph.input]
        input_initializer = [
            node.name for node in onnx_model.graph.initializer
        ]
        net_feed_input = list(set(input_all) - set(input_initializer))
        assert (len(net_feed_input) == 2)
        sess = rt.InferenceSession(onnx_file, session_options)
        onnx_output = sess.run(None, {
            'features': input.detach().numpy(),
            'rois': rois.detach().numpy()
        })
        onnx_output = onnx_output[0]

        # allclose

        assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
Exemple #14
0
def test_softnms():
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')
    from mmcv.ops import get_onnxruntime_op_path, soft_nms

    # only support pytorch >= 1.7.0
    if version.parse(torch.__version__) < version.parse('1.7.0'):
        warnings.warn('test_softnms should be ran with pytorch >= 1.7.0')
        return

    # only support onnxruntime >= 1.5.1
    assert version.parse(rt.__version__) >= version.parse(
        '1.5.1'), 'test_softnms should be ran with onnxruntime >= 1.5.1'

    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('softnms for onnxruntime is not compiled.')

    np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
                         [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
                        dtype=np.float32)
    np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)

    boxes = torch.from_numpy(np_boxes)
    scores = torch.from_numpy(np_scores)

    configs = [[0.3, 0.5, 0.01, 'linear'], [0.3, 0.5, 0.01, 'gaussian'],
               [0.3, 0.5, 0.01, 'naive']]

    session_options = rt.SessionOptions()
    session_options.register_custom_ops_library(ort_custom_op_path)

    for _iou_threshold, _sigma, _min_score, _method in configs:
        pytorch_dets, pytorch_inds = soft_nms(boxes,
                                              scores,
                                              iou_threshold=_iou_threshold,
                                              sigma=_sigma,
                                              min_score=_min_score,
                                              method=_method)
        nms = partial(soft_nms,
                      iou_threshold=_iou_threshold,
                      sigma=_sigma,
                      min_score=_min_score,
                      method=_method)

        wrapped_model = WrapFunction(nms)
        wrapped_model.cpu().eval()
        with torch.no_grad():
            torch.onnx.export(wrapped_model, (boxes, scores),
                              onnx_file,
                              export_params=True,
                              keep_initializers_as_inputs=True,
                              input_names=['boxes', 'scores'],
                              opset_version=11)
        onnx_model = onnx.load(onnx_file)

        # get onnx output
        input_all = [node.name for node in onnx_model.graph.input]
        input_initializer = [
            node.name for node in onnx_model.graph.initializer
        ]
        net_feed_input = list(set(input_all) - set(input_initializer))
        assert (len(net_feed_input) == 2)
        sess = rt.InferenceSession(onnx_file, session_options)
        onnx_dets, onnx_inds = sess.run(None, {
            'scores': scores.detach().numpy(),
            'boxes': boxes.detach().numpy()
        })

        assert np.allclose(pytorch_dets, onnx_dets, atol=1e-3)
        assert np.allclose(onnx_inds, onnx_inds, atol=1e-3)
Exemple #15
0
def evaluate(opt):
    # set config
    config = load_config(opt)
    if opt.num_threads > 0: torch.set_num_threads(opt.num_threads)
    config['opt'] = opt
    logger.info("%s", config)

    # set path
    set_path(config)

    # prepare test dataset
    test_loader = prepare_datasets(config)

    # load pytorch model checkpoint
    checkpoint = load_checkpoint(config)

    # prepare model and load parameters
    model = load_model(config, checkpoint)
    model.eval()

    # convert to onnx format
    if opt.convert_onnx:
        (x, y) = next(iter(test_loader))
        x = to_device(x, opt.device)
        y = to_device(y, opt.device)
        convert_onnx(config, model, x)
        check_onnx(config)
        logger.info("[ONNX model saved at {}".format(opt.onnx_path))
        return

    # load onnx model for using onnxruntime
    if opt.enable_ort:
        import onnxruntime as ort
        sess_options = ort.SessionOptions()
        sess_options.inter_op_num_threads = opt.num_threads
        sess_options.intra_op_num_threads = opt.num_threads
        ort_session = ort.InferenceSession(opt.onnx_path,
                                           sess_options=sess_options)

    # convert to tvm format
    if opt.convert_tvm:
        (x, y) = next(iter(test_loader))
        x = to_device(x, opt.device)
        y = to_device(y, opt.device)
        convert_tvm(config, model, x)
        logger.info("[TVM model saved at {}".format(opt.tvm_path))
        return

    # enable to use dynamic quantized model (pytorch>=1.3.0)
    if opt.enable_dqm and opt.device == 'cpu':
        model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear},
                                                    dtype=torch.qint8)
        print(model)

    # evaluation
    preds = None
    ys = None
    correct = 0
    n_batches = len(test_loader)
    total_examples = 0
    whole_st_time = time.time()
    first_time = time.time()
    first_examples = 0
    total_duration_time = 0.0
    with torch.no_grad():
        for i, (x, y) in enumerate(tqdm(test_loader, total=n_batches)):
            start_time = time.time()
            x = to_device(x, opt.device)
            y = to_device(y, opt.device)

            if opt.enable_ort:
                x = to_numpy(x)
                if config['emb_class'] == 'glove':
                    ort_inputs = {ort_session.get_inputs()[0].name: x}
                if config['emb_class'] in [
                        'bert', 'distilbert', 'albert', 'roberta', 'bart',
                        'electra'
                ]:
                    if config['emb_class'] in ['distilbert', 'bart']:
                        ort_inputs = {
                            ort_session.get_inputs()[0].name: x[0],
                            ort_session.get_inputs()[1].name: x[1]
                        }
                    else:
                        ort_inputs = {
                            ort_session.get_inputs()[0].name: x[0],
                            ort_session.get_inputs()[1].name: x[1],
                            ort_session.get_inputs()[2].name: x[2]
                        }
                logits = ort_session.run(None, ort_inputs)[0]
                logits = to_device(torch.tensor(logits), opt.device)
            else:
                logits = model(x)

            if preds is None:
                preds = to_numpy(logits)
                ys = to_numpy(y)
            else:
                preds = np.append(preds, to_numpy(logits), axis=0)
                ys = np.append(ys, to_numpy(y), axis=0)
            predicted = logits.argmax(1)
            correct += (predicted == y).sum().item()
            cur_examples = y.size(0)
            total_examples += cur_examples
            if i == 0:  # first one may take longer time, so ignore in computing duration.
                first_time = float((time.time() - first_time) * 1000)
                first_examples = cur_examples
            if opt.num_examples != 0 and total_examples >= opt.num_examples:
                logger.info("[Stop Evaluation] : up to the {} examples".format(
                    total_examples))
                break
            duration_time = float((time.time() - start_time) * 1000)
            if i != 0: total_duration_time += duration_time
            '''
            logger.info("[Elapsed Time] : {}ms".format(duration_time))
            '''
    # generate report
    labels = model.labels
    label_names = [v for k, v in sorted(labels.items(), key=lambda x: x[0])]
    preds_ids = np.argmax(preds, axis=1)
    try:
        print(
            classification_report(ys,
                                  preds_ids,
                                  target_names=label_names,
                                  digits=4))
        print(labels)
        print(confusion_matrix(ys, preds_ids))
    except Exception as e:
        logger.warn(str(e))

    acc = correct / total_examples
    whole_time = float((time.time() - whole_st_time) * 1000)
    avg_time = (whole_time - first_time) / (total_examples - first_examples)
    # write predictions to file
    write_prediction(opt, preds, labels)
    logger.info("[Accuracy] : {:.4f}, {:5d}/{:5d}".format(
        acc, correct, total_examples))
    logger.info("[Elapsed Time] : {}ms, {}ms on average".format(
        whole_time, avg_time))
    logger.info(
        "[Elapsed Time(total_duration_time, average)] : {}ms, {}ms".format(
            total_duration_time, total_duration_time / (total_examples - 1)))
Exemple #16
0
def _load_model(args) -> Any:
    # validation
    if args.device not in [None, "cpu"] and args.engine != TORCH_ENGINE:
        raise ValueError(
            f"device {args.device} is not supported for {args.engine}")
    if args.fp16 and args.engine != TORCH_ENGINE:
        raise ValueError(f"half precision is not supported for {args.engine}")
    if args.quantized_inputs and args.engine == TORCH_ENGINE:
        raise ValueError(f"quantized inputs not supported for {args.engine}")
    if args.num_cores is not None and args.engine == TORCH_ENGINE:
        raise ValueError(
            f"overriding default num_cores not supported for {args.engine}")
    if (args.num_cores is not None and args.engine == ORT_ENGINE
            and onnxruntime.__version__ < "1.7"):
        raise ValueError(
            "overriding default num_cores not supported for onnxruntime < 1.7.0. "
            "If using an older build with OpenMP, try setting the OMP_NUM_THREADS "
            "environment variable")

    # scale static ONNX graph to desired image shape
    if args.engine in [DEEPSPARSE_ENGINE, ORT_ENGINE]:
        args.model_filepath, _ = modify_yolo_onnx_input_shape(
            args.model_filepath, args.image_shape)
        has_postprocessing = yolo_onnx_has_postprocessing(args.model_filepath)

    # load model
    if args.engine == DEEPSPARSE_ENGINE:
        _LOGGER.info(f"Compiling DeepSparse model for {args.model_filepath}")
        model = compile_model(args.model_filepath, 1, args.num_cores)
        if args.quantized_inputs and not model.cpu_vnni:
            _LOGGER.warning("WARNING: VNNI instructions not detected, "
                            "quantization speedup not well supported")
    elif args.engine == ORT_ENGINE:
        _LOGGER.info(f"Loading onnxruntime model for {args.model_filepath}")

        sess_options = onnxruntime.SessionOptions()
        if args.num_cores is not None:
            sess_options.intra_op_num_threads = args.num_cores
        sess_options.log_severity_level = 3
        sess_options.graph_optimization_level = (
            onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL)

        onnx_model = onnx.load(args.model_filepath)
        override_model_batch_size(onnx_model, 1)
        model = onnxruntime.InferenceSession(onnx_model.SerializeToString(),
                                             sess_options=sess_options)
    elif args.engine == TORCH_ENGINE:
        _LOGGER.info(f"Loading torch model for {args.model_filepath}")
        model = torch.load(args.model_filepath)
        if isinstance(model, dict):
            model = model["model"]
        model.to(args.device)
        model.eval()
        if args.fp16:
            _LOGGER.info("Using half precision")
            model.half()
        else:
            _LOGGER.info("Using full precision")
            model.float()
        has_postprocessing = True

    return model, has_postprocessing
Exemple #17
0
def inference(opt):
    # set config
    config = load_config(opt)
    if opt.num_threads > 0: torch.set_num_threads(opt.num_threads)
    config['opt'] = opt

    # set path: opt.embedding_path, opt.vocab_path, opt.label_path
    set_path(config)

    # load pytorch model checkpoint
    checkpoint = load_checkpoint(config)

    # prepare model and load parameters
    model = load_model(config, checkpoint)
    model.eval()

    # load onnx model for using onnxruntime
    if opt.enable_ort:
        import onnxruntime as ort
        sess_options = ort.SessionOptions()
        sess_options.inter_op_num_threads = opt.num_threads
        sess_options.intra_op_num_threads = opt.num_threads
        ort_session = ort.InferenceSession(opt.onnx_path,
                                           sess_options=sess_options)

    # enable to use dynamic quantized model (pytorch>=1.3.0)
    if opt.enable_dqm and opt.device == 'cpu':
        model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear},
                                                    dtype=torch.qint8)
        print(model)

    # prepare tokenizer
    tokenizer = prepare_tokenizer(config, model)

    # prepare labels
    labels = model.labels

    # inference
    f_out = open(opt.test_path + '.inference', 'w', encoding='utf-8')
    total_examples = 0
    total_duration_time = 0.0
    with torch.no_grad(), open(opt.test_path, 'r', encoding='utf-8') as f:
        for i, line in enumerate(f):
            start_time = time.time()
            sent, label = line.strip().split('\t')
            x_raw = sent.split()
            y_raw = label
            text = ' '.join(x_raw)
            x = encode_text(config, tokenizer, text)
            x = to_device(x, opt.device)

            if opt.enable_ort:
                x = to_numpy(x)
                if config['emb_class'] == 'glove':
                    ort_inputs = {ort_session.get_inputs()[0].name: x}
                if config['emb_class'] in [
                        'bert', 'distilbert', 'albert', 'roberta', 'bart',
                        'electra'
                ]:
                    if config['emb_class'] in ['distilbert', 'bart']:
                        ort_inputs = {
                            ort_session.get_inputs()[0].name: x[0],
                            ort_session.get_inputs()[1].name: x[1]
                        }
                    else:
                        ort_inputs = {
                            ort_session.get_inputs()[0].name: x[0],
                            ort_session.get_inputs()[1].name: x[1],
                            ort_session.get_inputs()[2].name: x[2]
                        }
                logits = ort_session.run(None, ort_inputs)[0]
                logits = to_device(torch.tensor(logits), opt.device)
            else:
                logits = model(x)

            predicted = logits.argmax(1)
            predicted = to_numpy(predicted)[0]
            predicted_raw = labels[predicted]
            f_out.write(text + '\t' + y_raw + '\t' + predicted_raw + '\n')
            total_examples += 1
            if opt.num_examples != 0 and total_examples >= opt.num_examples:
                logger.info("[Stop Inference] : up to the {} examples".format(
                    total_examples))
                break
            duration_time = float((time.time() - start_time) * 1000)
            if i != 0: total_duration_time += duration_time
            logger.info("[Elapsed Time] : {}ms".format(duration_time))
    f_out.close()
    logger.info(
        "[Elapsed Time(total_duration_time, average)] : {}ms, {}ms".format(
            total_duration_time, total_duration_time / (total_examples - 1)))
def inference(onnx_model, model_dir, examples, fast_tokenizer, num_threads):
    quantized_str = ''
    if 'quantized' in onnx_model:
        quantized_str = 'quantized'
    onnx_inference = []
    #     pytorch_inference = []
    # onnx session
    options = ort.SessionOptions()
    options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
    options.intra_op_num_threads = 1
    print(onnx_model)
    ort_session = ort.InferenceSession(onnx_model, options)

    # pytorch pretrained model and tokenizer
    tokenizer = BertTokenizerFast.from_pretrained(model_dir)
    tokenizer_str = "BertTokenizerFast"

    print(
        "**************** {} ONNX inference with batch tokenization and with {} tokenizer****************"
        .format(quantized_str, tokenizer_str))
    start_onnx_inference_batch = time.time()
    start_batch_tokenization = time.time()
    tokens_dict = tokenizer.batch_encode_plus(examples, max_length=128)
    total_batch_tokenization_time = time.time() - start_batch_tokenization
    total_inference_time = 0
    total_build_label_time = 0
    for i in range(len(examples)):
        """
        Onnx inference with batch tokenization
        """

        if i % 100 == 0:
            print('[inference... ]', i, 'out of ', len(examples))

        tokens = get_tokens(tokens_dict, i)
        #inference
        start_inference = time.time()
        ort_outs = ort_session.run(None, tokens)
        total_inference_time = total_inference_time + (time.time() -
                                                       start_inference)
        #build label
        start_build_label = time.time()
        torch_onnx_output = torch.tensor(ort_outs[0], dtype=torch.float32)
        onnx_logits = F.softmax(torch_onnx_output, dim=1)
        logits_label = torch.argmax(onnx_logits, dim=1)
        label = logits_label.detach().cpu().numpy()
        #         onnx_inference.append(label[0])
        onnx_inference.append(onnx_logits.detach().cpu().numpy()[0].tolist())
        total_build_label_time = total_build_label_time + (time.time() -
                                                           start_build_label)


#         print(i, label[0], onnx_logits.detach().cpu().numpy()[0].tolist(), type(onnx_logits.detach().cpu().numpy()[0]) )

    end_onnx_inference_batch = time.time()
    print("Total batch tokenization time (in seconds): ",
          total_batch_tokenization_time)
    print("Total inference time (in seconds): ", total_inference_time)
    print("Total build label time (in seconds): ", total_build_label_time)
    print(
        "Duration ONNX inference (in seconds) with {} and batch tokenization: "
        .format(tokenizer_str),
        end_onnx_inference_batch - start_onnx_inference_batch,
        (end_onnx_inference_batch - start_onnx_inference_batch) /
        len(examples))

    return onnx_inference
Exemple #19
0
def generate_test_data(onnx_file,
                       output_path,
                       batch_size,
                       sequence_length,
                       use_cpu=True,
                       input_tensor_only=False,
                       dictionary_size=DICT_SIZE,
                       test_cases=3):
    input_data_type = np.int32
    for test_case in range(test_cases):
        input_1 = np.random.randint(dictionary_size,
                                    size=(batch_size, sequence_length),
                                    dtype=input_data_type)
        tensor_1 = numpy_helper.from_array(input_1, 'input_ids')

        actual_seq_len = random.randint(sequence_length - 3, sequence_length)
        input_2 = np.zeros((batch_size, sequence_length),
                           dtype=input_data_type)
        temp = np.ones((batch_size, actual_seq_len), dtype=input_data_type)
        input_2[:temp.shape[0], :temp.shape[1]] = temp
        tensor_2 = numpy_helper.from_array(input_2, 'attention_mask')

        input_3 = np.zeros((batch_size, sequence_length),
                           dtype=input_data_type)
        tensor_3 = numpy_helper.from_array(input_3, 'token_type_ids')

        path = os.path.join(output_path, 'test_data_set_' + str(test_case))
        try:
            os.mkdir(path)
        except OSError:
            print("Creation of the directory %s failed" % path)
        else:
            print("Successfully created the directory %s " % path)

        if input_tensor_only:
            return

        sess_options = onnxruntime.SessionOptions()
        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
        sess = onnxruntime.InferenceSession(onnx_file,
                                            sess_options,
                                            providers=['CPUExecutionProvider'])

        input1_name = sess.get_inputs()[0].name
        output_names = [output.name for output in sess.get_outputs()]
        inputs = {
            'input_ids': input_1,
            'attention_mask': input_2,
            'token_type_ids': input_3
        }
        print("inputs", inputs)
        result = sess.run(output_names, inputs)

        with open(os.path.join(path, 'input_{}.pb'.format(0)), 'wb') as f:
            f.write(tensor_1.SerializeToString())
        with open(os.path.join(path, 'input_{}.pb'.format(1)), 'wb') as f:
            f.write(tensor_2.SerializeToString())
        with open(os.path.join(path, 'input_{}.pb'.format(2)), 'wb') as f:
            f.write(tensor_3.SerializeToString())

        for i, output_name in enumerate(output_names):
            tensor_result = numpy_helper.from_array(
                np.asarray(result[i]).reshape((batch_size, sequence_length)),
                output_names[i])
            with open(os.path.join(path, 'output_{}.pb'.format(i)), 'wb') as f:
                f.write(tensor_result.SerializeToString())

        start_time = timeit.default_timer()

        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED

        path_prefix = onnx_file[:-5]  #remove .onnx suffix
        if use_cpu:
            sess_options.optimized_model_filepath = path_prefix + "_optimized_cpu.onnx"
        else:
            sess_options.optimized_model_filepath = path_prefix + "_optimized_gpu.onnx"

        session = onnxruntime.InferenceSession(onnx_file, sess_options)
        if use_cpu:
            session.set_providers(['CPUExecutionProvider'])  # use cpu
        else:
            if 'CUDAExecutionProvider' not in session.get_providers():
                print("Warning: GPU not found")
                continue
        outputs = session.run(None, inputs)
        evalTime = timeit.default_timer() - start_time
        if outputs[0].tolist() != result[0].tolist():
            print(
                "Error: not same result after optimization. use_cpu={}, no_opt_output={}, opt_output={}"
                .format(use_cpu, result[0].tolist(), outputs[1].tolist()))
        print("** Evaluation done in total {} secs".format(evalTime))
Exemple #20
0
    num_gpus = torch.cuda.device_count()

    print(target, model_name)

    model = model_fn(pretrained=True)
    # pytorch --> onnx model conversion
    if (target != "pytorch"):
        # write model out to onnx
        ref_input = torch.tensor(vidl.get_random(0, batch_len * num_gpus))
        torch.onnx.export(model, (ref_input),
                          "bench_out.onnx",
                          keep_initializers_as_inputs=True,
                          verbose=True,
                          opset_version=10)

        so = ort.SessionOptions()
        so.optimized_model_filepath = "bench_out.onnx.opt"
        session = ort.InferenceSession("bench_out.onnx", so)

        del session
        del so

    res = None
    if (num_gpus > 0):
        scaled_batch_len = batch_len * num_gpus
    else:
        scaled_batch_len = batch_len
    latency = []

    # PYTORCH BENCH
    if target == "pytorch":
def main():
    parser = argparse.ArgumentParser(description='Simple ONNX Runtime Test Tool.')
    parser.add_argument('model_path', help='model path')
    parser.add_argument('num_iters', nargs='?', type=int, default=1000, help='model run iterations. default=1000')
    parser.add_argument('--debug', action='store_true', help='pause execution to allow attaching a debugger.')
    parser.add_argument('--profile', action='store_true', help='enable chrome timeline trace profiling.')
    args = parser.parse_args()
    iters = args.num_iters

    if args.debug:
        print("Pausing execution ready for debugger to attach to pid: {}".format(os.getpid()))
        print("Press key to continue.")
        sys.stdin.read(1)

    sess_options = None
    if args.profile:
        sess_options = onnxrt.SessionOptions()
        sess_options.enable_profiling = True
        sess_options.profile_file_prefix = os.path.basename(args.model_path)

    sess = onnxrt.InferenceSession(args.model_path, sess_options)
    meta = sess.get_modelmeta()

    feeds = {}
    for input_meta in sess.get_inputs():
        # replace any symbolic dimensions (value is None) with 1
        shape = [dim if dim else 1 for dim in input_meta.shape]
        if input_meta.type in float_dict:
            feeds[input_meta.name] = np.random.rand(*shape).astype(float_dict[input_meta.type])
        elif input_meta.type in integer_dict:
            feeds[input_meta.name] = np.random.uniform(high=1000,
                                                       size=tuple(shape)).astype(integer_dict[input_meta.type])
        elif input_meta.type == 'tensor(bool)':
            feeds[input_meta.name] = np.random.randint(2, size=tuple(shape)).astype('bool')
        else:
            print("unsupported input type {} for input {}".format(input_meta.type, input_meta.name))
            sys.exit(-1)

    # Starting with IR4 some initializers provide default values
    # and can be overridden (available in IR4). For IR < 4 models
    # the list would be empty
    for initializer in sess.get_overridable_initializers():
        shape = [dim if dim else 1 for dim in initializer.shape]
        if initializer.type in float_dict:
            feeds[initializer.name] = np.random.rand(*shape).astype(float_dict[initializer.type])
        elif initializer.type in integer_dict:
            feeds[initializer.name] = np.random.uniform(high=1000,
                                                        size=tuple(shape)).astype(integer_dict[initializer.type])
        elif initializer.type == 'tensor(bool)':
            feeds[initializer.name] = np.random.randint(2, size=tuple(shape)).astype('bool')
        else:
            print("unsupported initializer type {} for initializer {}".format(initializer.type, initializer.name))
            sys.exit(-1)

    start = timer()
    for i in range(iters):
        sess.run([], feeds)  # fetch all outputs
    end = timer()

    print("model: {}".format(meta.graph_name))
    print("version: {}".format(meta.version))
    print("iterations: {}".format(iters))
    print("avg latency: {} ms".format(((end - start) * 1000) / iters))

    if args.profile:
        trace_file = sess.end_profiling()
        print("trace file written to: {}".format(trace_file))

    return 0
Exemple #22
0
    def predict(self, data):
        import onnxruntime as ort

        assert self.model is not None

        remainder_sess = None
        sess_options = ort.SessionOptions()
        sess_options.intra_op_num_threads = self.params["nthread"]
        sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
        sess = ort.InferenceSession(self.model.SerializeToString(),
                                    sess_options=sess_options)
        if self.remainder_model is not None:
            remainder_sess = ort.InferenceSession(
                self.remainder_model.SerializeToString(),
                sess_options=sess_options)

        batch_size = 1 if self.params["operator"] == "xgb" else self.params[
            "batch_size"]
        input_name = sess.get_inputs()[0].name
        is_regression = data.learning_task == LearningTask.REGRESSION
        if is_regression:
            output_name_index = 0
        else:
            output_name_index = 1
        output_name = sess.get_outputs()[output_name_index].name

        with Timer() as t:
            predict_data = ScoreBackend.get_data(data.X_test)
            total_size = len(predict_data)
            iterations = total_size // batch_size
            iterations += 1 if total_size % batch_size > 0 else 0
            iterations = max(1, iterations)
            self.predictions = np.empty([total_size, self.params["n_classes"]],
                                        dtype="f4")
            for i in range(0, iterations):
                start = i * batch_size
                end = min(start + batch_size, total_size)

                if self.params["operator"] == "xgb":
                    self.predictions[start:end, :] = sess.run(
                        [output_name],
                        {input_name: predict_data[start:end, :]})
                elif self.params["operator"] == "lgbm" or "rf":
                    if i == iterations - 1 and self.remainder_model is not None:
                        pred = remainder_sess.run(
                            [output_name],
                            {input_name: predict_data[start:end, :]})
                    else:
                        pred = sess.run(
                            [output_name],
                            {input_name: predict_data[start:end, :]})

                    if is_regression:
                        self.predictions[start:end, :] = pred[0]
                    else:
                        self.predictions[start:end, :] = list(
                            map(lambda x: list(x.values()), pred[0]))

        if is_regression:
            self.predictions = self.predictions.flatten()
        del sess
        if remainder_sess is not None:
            del remainder_sess

        return t.interval
Exemple #23
0
        for shp in ishapes:
            ts = np.product(shp)
            #print("reshaping %s with offset %d" % (str(shp), offset), file=sys.stderr)
            inputs.append(read(ts).reshape(shp))
        ret = m.run(None, dict(zip(keys, inputs)))
        #print(ret, file=sys.stderr)
        for r in ret:
            write(r)


if __name__ == "__main__":
    print(ort.get_available_providers(), file=sys.stderr)
    if 'OpenVINOExecutionProvider' in ort.get_available_providers(
    ) and 'ONNXCPU' not in os.environ:
        print("OnnxJit is using openvino", file=sys.stderr)
        options = ort.SessionOptions()
        options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
        provider = 'OpenVINOExecutionProvider'
    elif 'CUDAExecutionProvider' in ort.get_available_providers(
    ) and 'ONNXCPU' not in os.environ:
        print("OnnxJit is using CUDA")
        options = ort.SessionOptions()
        options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
        provider = 'CUDAExecutionProvider'
    else:
        print("OnnxJit is using CPU", file=sys.stderr)
        options = ort.SessionOptions()
        options.intra_op_num_threads = 4
        options.inter_op_num_threads = 8
        options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
        options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
Exemple #24
0
def onnx2tensorrt(onnx_file,
                  trt_file,
                  input_config,
                  verify=False,
                  show=False,
                  dataset='coco',
                  workspace_size=1,
                  verbose=False):
    import tensorrt as trt
    onnx_model = onnx.load(onnx_file)
    input_shape = input_config['input_shape']
    # create trt engine and wraper
    opt_shape_dict = {'input': [input_shape, input_shape, input_shape]}
    max_workspace_size = get_GiB(workspace_size)
    trt_engine = onnx2trt(
        onnx_model,
        opt_shape_dict,
        log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR,
        fp16_mode=False,
        max_workspace_size=max_workspace_size)
    save_dir, _ = osp.split(trt_file)
    if save_dir:
        os.makedirs(save_dir, exist_ok=True)
    save_trt_engine(trt_engine, trt_file)
    print(f'Successfully created TensorRT engine: {trt_file}')

    if verify:
        one_img, one_meta = preprocess_example_input(input_config)
        input_img_cpu = one_img.detach().cpu().numpy()
        input_img_cuda = one_img.cuda()
        img = one_meta['show_img']

        # Get results from ONNXRuntime
        ort_custom_op_path = get_onnxruntime_op_path()
        session_options = ort.SessionOptions()
        if osp.exists(ort_custom_op_path):
            session_options.register_custom_ops_library(ort_custom_op_path)
        sess = ort.InferenceSession(onnx_file, session_options)
        output_names = [_.name for _ in sess.get_outputs()]
        ort_outputs = sess.run(None, {
            'input': input_img_cpu,
        })
        with_mask = len(output_names) == 3
        ort_outputs = [_.squeeze(0) for _ in ort_outputs]
        ort_dets, ort_labels = ort_outputs[:2]
        ort_masks = ort_outputs[2] if with_mask else None
        ort_shapes = [_.shape for _ in ort_outputs]
        print(f'ONNX Runtime output names: {output_names}, \
            output shapes: {ort_shapes}')

        # Get results from TensorRT
        trt_model = TRTWraper(trt_file, ['input'], output_names)
        with torch.no_grad():
            trt_outputs = trt_model({'input': input_img_cuda})
        trt_outputs = [
            trt_outputs[_].detach().cpu().numpy().squeeze(0)
            for _ in output_names
        ]
        trt_dets, trt_labels = trt_outputs[:2]
        trt_shapes = [_.shape for _ in trt_outputs]
        print(f'TensorRT output names: {output_names}, \
            output shapes: {trt_shapes}')
        trt_masks = trt_outputs[2] if with_mask else None

        # Show detection outputs
        if show:
            CLASSES = get_classes(dataset)
            score_thr = 0.35
            imshow_det_bboxes(img.copy(),
                              trt_dets,
                              trt_labels,
                              segms=trt_masks,
                              class_names=CLASSES,
                              score_thr=score_thr,
                              win_name='TensorRT')
            imshow_det_bboxes(img.copy(),
                              ort_dets,
                              ort_labels,
                              segms=ort_masks,
                              class_names=CLASSES,
                              score_thr=score_thr,
                              win_name='ONNXRuntime')
        # Compare results
        np.testing.assert_allclose(ort_dets, trt_dets, rtol=1e-03, atol=1e-05)
        np.testing.assert_allclose(ort_labels, trt_labels)
        if with_mask:
            np.testing.assert_allclose(ort_masks,
                                       trt_masks,
                                       rtol=1e-03,
                                       atol=1e-05)
        print('The numerical values are the same ' +
              'between ONNXRuntime and TensorRT')
Exemple #25
0
 def testOrtExecutionMode(self):
     opt = onnxrt.SessionOptions()
     self.assertEqual(opt.execution_mode,
                      onnxrt.ExecutionMode.ORT_SEQUENTIAL)
     opt.execution_mode = onnxrt.ExecutionMode.ORT_PARALLEL
     self.assertEqual(opt.execution_mode, onnxrt.ExecutionMode.ORT_PARALLEL)
## conda create -n cv2020 python=3.8
## conda activate cv2020
## conda install onnx protobuf numpy pip six fastapi uvicorn python-multipart -c conda-forge
## pip install opencv-python # need to install from pip due to QT dependencies on arm64

## ONNXRuntime https://elinux.org/Jetson_Zoo#ONNX_Runtime
## wget https://nvidia.box.com/shared/static/8xgbee5ghhb92i9rrcr04yymg0n3x3t0.whl -O onnxruntime_gpu-1.7.0-cp38-cp38-linux_aarch64.whl
## pip install onnxruntime_gpu-1.7.0-cp38-cp38-linux_aarch64.whl

import cv2
import onnxruntime as rt
import numpy as np

####
sessOptions = rt.SessionOptions()
sessOptions.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
raccoonModel = rt.InferenceSession('raccoon_sim.onnx', sessOptions)
####
inputStream = cv2.VideoCapture(0)

while True:
    isImageValid, inputImage = inputStream.read()

    if isImageValid:
        ### Pre-processing ###
        inputTensor = cv2.resize(inputImage, (320, 320))
        inputTensor = (inputTensor -
                       [103.53, 116.28, 123.675]) / [57.375, 57.12, 58.395]
        inputTensor = inputTensor.transpose(2, 0, 1)[np.newaxis].astype(
            np.float32)  #NCHW
        ### Inference ###
Exemple #27
0
def create_ort_training_session_with_optimizer(model, device, training_optimizer_name, lr_params_feed_name,
                                               map_optimizer_attributes, world_rank=-1, world_size=1,
                                               gradient_accumulation_steps=1, bind_parameters=False,
                                               use_mixed_precision=False, allreduce_post_accumulation=False,
                                               deepspeed_zero_stage=0,
                                               enable_grad_norm_clip=True,
                                               frozen_weights=[], opset_version=DEFAULT_OPSET_VERSION,
                                               use_deterministic_compute=False,
                                               use_invertible_layernorm_grad=False,
                                               enable_adasum=False):
    output_name = model.graph.output[0].name
    ort_parameters = ort.TrainingParameters()
    ort_parameters.loss_output_name = output_name
    ort_parameters.use_mixed_precision = use_mixed_precision
    ort_parameters.world_rank = world_rank
    ort_parameters.world_size = world_size
    ort_parameters.gradient_accumulation_steps = gradient_accumulation_steps
    ort_parameters.allreduce_post_accumulation = allreduce_post_accumulation
    ort_parameters.deepspeed_zero_stage = deepspeed_zero_stage
    ort_parameters.enable_grad_norm_clip = enable_grad_norm_clip
    ort_parameters.set_gradients_as_graph_outputs = False
    ort_parameters.use_invertible_layernorm_grad = use_invertible_layernorm_grad
    ort_parameters.enable_adasum = enable_adasum
    output_types = {}
    for output in model.graph.output:
        output_types[output.name] = output.type.tensor_type

    # pybind does not allow to add directly to ort_parameters.weights_to_train.
    # Have to work around by using a temporary weights_to_train.
    torch_params = {}
    optimizer_attributes_map = {}
    optimizer_int_attributes_map = {}

    unused_frozen_weights = [n for n in frozen_weights if n not in [i.name for i in model.graph.initializer]]
    if unused_frozen_weights:
        raise RuntimeError("{} in frozen_weights not found in model weights.".format(unused_frozen_weights))

    weights_to_train = set()
    for initializer in model.graph.initializer:
        if initializer.name in frozen_weights:
            continue
        weights_to_train.add(initializer.name)
        if map_optimizer_attributes is not None:
            attributes = map_optimizer_attributes(initializer.name)
            optimizer_attributes_map[initializer.name] = {}
            optimizer_int_attributes_map[initializer.name] = {}
            for k, v in attributes.items():
                if isinstance(v, float):
                    optimizer_attributes_map[initializer.name][k] = v
                elif isinstance(v, int):
                    optimizer_int_attributes_map[initializer.name][k] = v
                else:
                    raise ValueError("Optimizer attributes must be either float or int.")
        else:
            optimizer_attributes_map[initializer.name] = {}
            optimizer_int_attributes_map[initializer.name] = {}

    if bind_parameters:
        for initializer in model.graph.initializer:
            torch_tensor = torch.nn.Parameter(torch.as_tensor(numpy_helper.to_array(initializer), device=device))
            delete_input_with_name(model.graph.input, initializer.name)
            model.graph.input.extend(
                [helper.make_tensor_value_info(initializer.name, initializer.data_type, initializer.dims)])
            torch_params[initializer.name] = torch_tensor

        del model.graph.initializer[:]

    ort_parameters.weights_to_train = weights_to_train
    ort_parameters.training_optimizer_name = training_optimizer_name
    ort_parameters.lr_params_feed_name = lr_params_feed_name
    ort_parameters.optimizer_attributes_map = optimizer_attributes_map
    ort_parameters.optimizer_int_attributes_map = optimizer_int_attributes_map

    sessionOptions = ort.SessionOptions()
    sessionOptions.use_deterministic_compute = use_deterministic_compute
    session = ort.TrainingSession(model.SerializeToString(), ort_parameters, sessionOptions)
    train_io_binding = session.io_binding()
    eval_io_binding = session.io_binding()

    if bind_parameters:
        for param in torch_params.keys():
            torch_tensor = torch_params[param]

            train_io_binding.bind_input(param, torch_tensor.device.type, get_device_index(torch_tensor.device),
                                        dtype_torch_to_numpy(torch_params[param].dtype), list(torch_tensor.size()),
                                        torch_tensor.data_ptr())
            eval_io_binding.bind_input(param, torch_tensor.device.type, get_device_index(torch_tensor.device),
                                       dtype_torch_to_numpy(torch_params[param].dtype), list(torch_tensor.size()),
                                       torch_tensor.data_ptr())

    return session, train_io_binding, eval_io_binding, output_name, torch_params, output_types
Exemple #28
0
def convert_to_onnx_and_check(
    job_func,
    print_outlier=False,
    explicit_init=True,
    external_data=False,
    ort_optimize=True,
    opset=None,
):
    check_point = flow.train.CheckPoint()
    if explicit_init:
        # it is a trick to keep check_point.save() from hanging when there is no variable
        @flow.global_function(flow.FunctionConfig())
        def add_var():
            return flow.get_variable(
                name="trick",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(),
            )

        check_point.init()
    flow_weight_dir = tempfile.TemporaryDirectory()
    check_point.save(flow_weight_dir.name)
    # TODO(daquexian): a more elegant way?
    while not os.path.exists(
            os.path.join(flow_weight_dir.name, "snapshot_done")):
        pass
    onnx_model_dir = tempfile.TemporaryDirectory()
    onnx_model_path = os.path.join(onnx_model_dir.name, "model.onnx")
    flow.onnx.export(
        job_func,
        flow_weight_dir.name,
        onnx_model_path,
        opset=opset,
        external_data=external_data,
    )
    flow_weight_dir.cleanup()
    ort_sess_opt = ort.SessionOptions()
    ort_sess_opt.graph_optimization_level = (
        ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
        if ort_optimize else ort.GraphOptimizationLevel.ORT_DISABLE_ALL)
    sess = ort.InferenceSession(onnx_model_path, sess_options=ort_sess_opt)
    onnx_model_dir.cleanup()
    assert len(sess.get_outputs()) == 1
    assert len(sess.get_inputs()) <= 1
    ipt_dict = OrderedDict()
    for ipt in sess.get_inputs():
        ipt_data = np.random.uniform(low=-10, high=10,
                                     size=ipt.shape).astype(np.float32)
        ipt_dict[ipt.name] = ipt_data

    onnx_res = sess.run([], ipt_dict)[0]
    oneflow_res = job_func(*ipt_dict.values()).get().numpy()
    rtol, atol = 1e-2, 1e-5
    if print_outlier:
        a = onnx_res.flatten()
        b = oneflow_res.flatten()
        for i in range(len(a)):
            if np.abs(a[i] - b[i]) > atol + rtol * np.abs(b[i]):
                print("a[{}]={}, b[{}]={}".format(i, a[i], i, b[i]))
    assert np.allclose(onnx_res, oneflow_res, rtol=rtol, atol=atol)
Exemple #29
0
    def __test_export_route(self, module, out_name, mode, input_example=None):
        # select correct extension based on the output format
        ext = {
            DF.ONNX: ".onnx",
            DF.TRTONNX: ".trt.onnx",
            DF.PYTORCH: ".pt",
            DF.TORCHSCRIPT: ".ts"
        }.get(mode, ".onnx")
        out = Path(f"{out_name}{ext}")
        out_name = str(out)

        if out.exists():
            os.remove(out)

        module.eval()
        outputs_fwd = (module.forward(*tuple(input_example.values()))
                       if isinstance(input_example, OrderedDict) else
                       (module.forward(
                           *input_example) if isinstance(input_example, tuple)
                        else module.forward(input_example)
                        if input_example is not None else None))

        deploy_input_example = (tuple(input_example.values()) if isinstance(
            input_example, OrderedDict) else input_example)
        self.nf.deployment_export(
            module=module,
            output=out_name,
            input_example=deploy_input_example,
            d_format=mode,
            output_example=outputs_fwd,
        )

        tol = 5.0e-3
        assert out.exists() == True

        if mode == DF.TRTONNX:

            data_loader = DefaultDataLoader()
            loader_cache = DataLoaderCache(data_loader)
            profile_shapes = OrderedDict()
            names = list(module.input_ports) + list(module.output_ports)
            names = list(
                filter(
                    lambda x: x not in
                    (module._disabled_deployment_input_ports | module.
                     _disabled_deployment_output_ports),
                    names,
                ))
            if isinstance(input_example, tuple):
                si = [
                    tuple(input_example[i].shape)
                    for i in range(len(input_example))
                ]
            elif isinstance(input_example, OrderedDict):
                si = [
                    tuple(input_example.values())[i].shape
                    for i in range(len(input_example))
                ]
            else:
                si = [tuple(input_example.shape)]
            if isinstance(outputs_fwd, tuple):
                fi = [
                    tuple(outputs_fwd[i].shape)
                    for i in range(len(outputs_fwd))
                ]
            else:
                fi = [tuple(outputs_fwd.shape)]
            si = si + fi
            i = 0
            for name in names:
                profile_shapes[name] = [si[i]] * 3
                i = i + 1

            onnx_loader = OnnxFileLoader(out_name)
            network_loader = OnnxNetworkLoader(onnx_loader,
                                               explicit_precision=False)
            model_loader = BuildEngineLoader(
                network_loader,
                max_workspace_size=1 << 30,
                fp16_mode=False,
                int8_mode=False,
                profile_shapes=profile_shapes,
                write_engine=None,
                calibrator=None,
                layerwise=False,
            )

            with TensorRTRunnerV2(model_loader=model_loader) as active_runner:
                input_metadata = active_runner.get_input_metadata()
                if input_metadata is None:
                    logging.critical(
                        "For {:}, get_input_metadata() returned None!".format(
                            active_runner.name))
                logging.debug("Runner Inputs: {:}".format(input_metadata))
                feed_dict = loader_cache.load(iteration=0,
                                              input_metadata=input_metadata,
                                              input_example=input_example)
                inputs = dict()
                input_names = list(input_metadata.keys())
                for i in range(len(input_names)):
                    input_name = input_names[i]
                    if input_name in module._disabled_deployment_input_ports:
                        continue
                    inputs[input_name] = (
                        input_example[input_name].cpu().numpy() if isinstance(
                            input_example, OrderedDict) else
                        (input_example[i].cpu().numpy() if isinstance(
                            input_example, tuple) else
                         input_example.cpu().numpy()))

                out_dict = active_runner.infer(feed_dict=feed_dict,
                                               output=outputs_fwd)
                for ov in out_dict.values():
                    outputs_scr = torch.from_numpy(ov).cuda()
                    break

                outputs = []
                outputs.append(copy.deepcopy(out_dict))
                logging.debug("Received outputs: {:}".format([
                    "{:}: {:}".format(name, out.shape)
                    for name, out in out_dict.items()
                ]))
                logging.info("Output Buffers: {:}".format(outputs))

            inpex = []
            for ie in feed_dict.values():  # loader_cache.cache[0].values():
                if ie.dtype.type is np.int32:
                    inpex.append(torch.from_numpy(ie).long().cuda())
                else:
                    inpex.append(torch.from_numpy(ie).cuda())
                if len(inpex) == len(input_example):
                    break
            inpex = tuple(inpex)
            outputs_fwd = module.forward(*inpex)

        elif mode == DF.ONNX:
            # Must recompute because *module* might be different now
            outputs_fwd = (
                module.forward(*tuple(input_example.values())) if isinstance(
                    input_example, OrderedDict) else
                (module.forward(*input_example) if isinstance(
                    input_example, tuple) else module.forward(input_example)))
            sess_options = ort.SessionOptions()
            sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
            ort_session = ort.InferenceSession(out_name, sess_options,
                                               ['CUDAExecutionProvider'])
            print('Execution Providers: ', ort_session.get_providers())
            inputs = dict()
            input_names = list(module.input_ports)
            ort_inputs = ort_session.get_inputs()
            for i in range(len(input_names)):
                input_name = input_names[i]
                if input_name in module._disabled_deployment_input_ports:
                    continue
                inputs[input_name] = (input_example[input_name].cpu().numpy()
                                      if isinstance(input_example,
                                                    OrderedDict) else
                                      (input_example[i].cpu().numpy()
                                       if isinstance(input_example, tuple) else
                                       input_example.cpu().numpy()))
            outputs_scr = ort_session.run(None, inputs)
            outputs_scr = torch.from_numpy(outputs_scr[0]).cuda()
        elif mode == DF.TORCHSCRIPT:
            scr = torch.jit.load(out_name)
            if isinstance(module, nemo.backends.pytorch.tutorials.TaylorNet):
                input_example = torch.randn(4, 1).cuda()
                outputs_fwd = module.forward(input_example)
            outputs_scr = (
                module.forward(*tuple(input_example.values())) if isinstance(
                    input_example, OrderedDict) else
                (module.forward(*input_example) if isinstance(
                    input_example, tuple) else module.forward(input_example)))
        elif mode == DF.PYTORCH:
            module.load_state_dict(torch.load(out_name))
            module.eval()
            outputs_scr = (
                module.forward(*tuple(input_example.values())) if isinstance(
                    input_example, OrderedDict) else
                (module.forward(*input_example) if isinstance(
                    input_example, tuple) else module.forward(input_example)))

        outputs_scr = (outputs_scr[0] if isinstance(outputs_scr, tuple)
                       or isinstance(outputs_scr, list) else outputs_scr)
        outputs_fwd = (outputs_fwd[0] if isinstance(outputs_fwd, tuple)
                       or isinstance(outputs_fwd, list) else outputs_fwd)

        assert (outputs_scr - outputs_fwd).norm(p=2) < tol

        if out.exists():
            os.remove(out)
Exemple #30
0
def onnx2tensorrt(onnx_file: str,
                  trt_file: str,
                  config: dict,
                  input_config: dict,
                  model_type: str,
                  img_path: str,
                  fp16: bool = False,
                  verify: bool = False,
                  show: bool = False,
                  workspace_size: int = 1,
                  verbose: bool = False):
    """Convert ONNX model to TensorRT model

    Args:
        onnx_file (str): the path of the input ONNX file.
        trt_file (str): the path to output the TensorRT file.
        config (dict): MMCV configuration.
        input_config (dict): contains min_shape, max_shape and \
            input image path.
        fp16 (bool): whether to enable fp16 mode.
        verify (bool): whether to verify the outputs of TensorRT \
            and ONNX are same.
        show (bool): whether to show the outputs of TensorRT and ONNX.
        verbose (bool): whether to print the log when generating \
            TensorRT model.
    """
    import tensorrt as trt
    min_shape = input_config['min_shape']
    max_shape = input_config['max_shape']
    # create trt engine and wrapper
    opt_shape_dict = {'input': [min_shape, min_shape, max_shape]}
    max_workspace_size = get_GiB(workspace_size)
    trt_engine = onnx2trt(
        onnx_file,
        opt_shape_dict,
        log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR,
        fp16_mode=fp16,
        max_workspace_size=max_workspace_size)
    save_dir, _ = osp.split(trt_file)
    if save_dir:
        os.makedirs(save_dir, exist_ok=True)
    save_trt_engine(trt_engine, trt_file)
    print(f'Successfully created TensorRT engine: {trt_file}')

    if verify:
        inputs = _prepare_input_img(
            model_type=model_type, img_path=img_path, config=config)

        imgs = inputs['imgs']
        img_list = [imgs.unsqueeze(0)]

        if max_shape[0] > 1:
            # concate flip image for batch test
            flip_img_list = [_.flip(-1) for _ in img_list]
            img_list = [
                torch.cat((ori_img, flip_img), 0)
                for ori_img, flip_img in zip(img_list, flip_img_list)
            ]

        # Get results from ONNXRuntime
        ort_custom_op_path = get_onnxruntime_op_path()
        session_options = ort.SessionOptions()
        if osp.exists(ort_custom_op_path):
            session_options.register_custom_ops_library(ort_custom_op_path)
        sess = ort.InferenceSession(onnx_file, session_options)
        sess.set_providers(['CPUExecutionProvider'], [{}])  # use cpu mode
        onnx_output = sess.run(['output'],
                               {'input': img_list[0].detach().numpy()})[0][0]

        # Get results from TensorRT
        trt_model = TRTWrapper(trt_file, ['input'], ['output'])
        with torch.no_grad():
            trt_outputs = trt_model({'input': img_list[0].contiguous().cuda()})
        trt_output = trt_outputs['output'][0].cpu().detach().numpy()

        if show:
            onnx_visualize = onnx_output.transpose(1, 2, 0)
            onnx_visualize = np.clip(onnx_visualize, 0, 1)[:, :, ::-1]
            trt_visualize = trt_output.transpose(1, 2, 0)
            trt_visualize = np.clip(trt_visualize, 0, 1)[:, :, ::-1]

            cv2.imshow('ONNXRuntime', onnx_visualize)
            cv2.imshow('TensorRT', trt_visualize)
            cv2.waitKey()

        np.testing.assert_allclose(
            onnx_output, trt_output, rtol=1e-03, atol=1e-05)
        print('TensorRT and ONNXRuntime output all close.')