コード例 #1
0
def acc_ops_squeeze(network, target, args, kwargs, name):
    input_val = kwargs["input"]

    if not isinstance(input_val, trt.tensorrt.ITensor):
        raise RuntimeError(
            f"squeeze received input {input_val} that is not part "
            "of the TensorRT region!"
        )

    dim = kwargs["dim"] if "dim" in kwargs else None
    # Squeeze with dim=None would only work in explicit batch dim mode without any dynamic
    # dim, which is a very rare case. For now we just claim not supporting dim=None.
    assert dim is not None, "We don't support dim=None right now."

    if network.has_implicit_batch_dimension:
        assert dim != 0, "We don't support squeeze batch dim when it's implicit."
        dim -= 1

    assert input_val.shape[dim] != -1, "We don't support squeeze dynamic dim."
    assert (
        len(get_dynamic_dims(input_val.shape)) <= 1
    ), "Currently more than one dynamic dim for input to squeeze is not supported."

    output_shape = []
    for i, s in enumerate(input_val.shape):
        if i == dim and s == 1:
            continue
        output_shape.append(s)
    layer = network.add_shuffle(input_val)
    layer.reshape_dims = tuple(output_shape)
    layer.name = name
    return layer.get_output(0)
コード例 #2
0
def acc_ops_linear(network, target, args, kwargs, name):
    input_val = kwargs["input"]

    if not isinstance(input_val, trt.tensorrt.ITensor):
        raise RuntimeError(
            f"Linear received input {input_val} that is not part "
            "of the TensorRT region!")

    dynamic_dims = get_dynamic_dims(input_val.shape)
    assert len(dynamic_dims) < 2 and input_val.shape[-1] != -1, (
        "Currently we only support one dynmaic "
        "dim for linear and it can't be the last dim.")

    weight = kwargs["weight"]

    # For quantization, weight here would be a trt tensor because it goes through
    # quant + dequant. In this case, we need to use matmul + add because fully_connected
    # can't take non-constant weight.
    # TODO: Need to benchmark the performance of lowering linear as fully_connected versus
    # lowering as matmul + add. TensorRT documentation suggests to always lower it as
    # matmul + add but we found in some cases this results in performance regression compared
    # with lowering to fully_connected layer.
    if isinstance(weight, torch.Tensor):
        layer = network.add_shuffle(input_val)
        layer.reshape_dims = tuple(input_val.shape) + (1, 1)
        layer.name = f"{name}_pre_shuffle"

        # add fully connected
        layer = network.add_fully_connected(
            input=layer.get_output(0),
            num_outputs=kwargs["weight"].shape[0],
            kernel=to_numpy(kwargs["weight"]),
            bias=to_numpy(kwargs["bias"]),
        )
        layer.name = f"{name}_linear"

        # reshape back
        layer = network.add_shuffle(layer.get_output(0))
        layer.reshape_dims = tuple(
            input_val.shape[:-1]) + (kwargs["weight"].shape[0], )
        layer.name = f"{name}_post_shuffle"

        return layer.get_output(0)
    else:
        # add matrix multiply and add
        output = add_matrix_multiply_layer(network,
                                           input_val,
                                           weight,
                                           f"{name}_linear_mm",
                                           transpose_other=True)
        if kwargs["bias"] is not None:
            return add_binary_elementwise_layer(network, output,
                                                kwargs["bias"],
                                                trt.ElementWiseOperation.SUM,
                                                f"{name}_linear_add")
        else:
            return output
コード例 #3
0
def acc_ops_unsqueeze(network, target, args, kwargs, name):
    input_val = kwargs["input"]

    if not isinstance(input_val, trt.tensorrt.ITensor):
        raise RuntimeError(f"unsqueeze received input {input_val} that is not part "
                           "of the TensorRT region!")

    dim = kwargs["dim"]
    if network.has_implicit_batch_dimension:
        assert dim != 0
        dim -= 1

    assert len(get_dynamic_dims(input_val.shape)) <= 1, "Currently we don't support unsqueeze with more than one dynamic dims."
    layer = network.add_shuffle(input_val)
    layer.reshape_dims = tuple(input_val.shape)[:dim] + (1,) + tuple(input_val.shape)[dim:]
    layer.name = name
    return layer.get_output(0)