Beispiel #1
0
def aten_batch_norm(inputs, attributes, scope):
    inp, weight, bias, running_mean, running_var = inputs[:5]
    training, momentum, eps = inputs[5:8]
    # assert training is False
    ctx = current_context()
    net = ctx.network
    if ctx.is_tensorrt and has_trt_tensor(inputs):
        running_mean = running_mean.detach().cpu().numpy()
        running_var = running_var.detach().cpu().numpy()
        weight = weight.detach().cpu().numpy()
        bias = bias.detach().cpu().numpy()
        shift = (-running_mean / np.sqrt(running_var + eps)) * weight + bias
        scale = weight / np.sqrt(running_var + eps)
        power = np.ones_like(shift)
        layer = net.add_scale(inp, trt.ScaleMode.CHANNEL, shift, scale, power)
        output = layer.get_output(0)
        output.name = scope
        layer.name = scope
        ctx.refit_weight_dict[layer.name] = {
            "type": "BatchNorm",
            "running_mean": inputs[3].__torch2trt_weight_name,
            "running_var": inputs[4].__torch2trt_weight_name,
            "weight": inputs[1].__torch2trt_weight_name,
            "bias": inputs[2].__torch2trt_weight_name,
            "eps": eps,
        }
        return [output]
    elif ctx.is_tvm and has_tvm_tensor(inputs):
        running_mean = running_mean.detach().cpu().numpy()
        running_var = running_var.detach().cpu().numpy()
        weight = weight.detach().cpu().numpy()
        bias = bias.detach().cpu().numpy()
        running_mean_t = _expr.var(scope + "/running_mean",
                                   shape=running_mean.shape,
                                   dtype="float32")
        running_var_t = _expr.var(scope + "/running_var",
                                  shape=running_var.shape,
                                  dtype="float32")
        weight_t = _expr.var(scope + "/weight",
                             shape=weight.shape,
                             dtype="float32")
        bias_t = _expr.var(scope + "/bias", shape=bias.shape, dtype="float32")
        ctx.tvm_weight_dict[running_mean_t] = running_mean
        ctx.tvm_weight_dict[running_var_t] = running_var
        ctx.tvm_weight_dict[weight_t] = weight
        ctx.tvm_weight_dict[bias_t] = bias
        new_attrs = {}
        new_attrs["axis"] = 1
        new_attrs["epsilon"] = eps
        new_attrs["center"] = True
        new_attrs["scale"] = True
        new_attrs['gamma'] = weight_t
        new_attrs['beta'] = bias_t
        new_attrs['moving_mean'] = running_mean_t
        new_attrs['moving_var'] = running_var_t
        result, moving_mean, moving_var = _op.nn.batch_norm(inp, **new_attrs)
        return [result]
    res = F.batch_norm(inp, running_mean, running_var, weight, bias,
                       bool(training), momentum, eps)
    return [res]
Beispiel #2
0
    def __init__(self, weight, bias, scale, zero_point, param_key):
        param_prefix = param_key[: -len("._packed_params")]
        self.weight_var = _expr.var(param_prefix + "_weight", shape=weight.shape)
        self.weight = weight

        if bias is not None:
            self.bias_var = _expr.var(param_prefix + "_bias", shape=bias.shape)
            self.bias = bias.detach().numpy()
        else:
            self.bias_var = None
            self.bias = None

        self.scale = _expr.const(scale)
        self.zero_point = _expr.const(zero_point, dtype="int32")
Beispiel #3
0
def aten_matmul(inputs, attributes, scope):
    mat1, mat2 = inputs[:2]
    ctx = current_context()
    net = ctx.network
    if ctx.is_tensorrt and has_trt_tensor(inputs):
        assert isinstance(mat2, torch.Tensor)
        inp = mat1
        weight = mat2.t().detach().cpu().numpy()
        C = weight.shape[0]
        # use fc to implement this
        if len(inp.shape) < 3:
            inp = _trt_reshape(net, inp, [-1, 1, 1], scope + "/reshape")
        layer = net.add_fully_connected(inp, C, weight, trt.Weights())
        output = layer.get_output(0)
        output.name = scope
        layer.name = scope
        ctx.refit_weight_dict[layer.name] = {
            "type": "Linear",
            "weight": inputs[1].__torch2trt_weight_name,
        }
        return [output]
    elif ctx.is_tvm and has_tvm_tensor(inputs):
        inp = mat1
        weight = mat2.t().detach().cpu().numpy()
        C = weight.shape[0]
        weight_t = _expr.var(scope + "/weight",
                             shape=weight.shape,
                             dtype="float32")
        ctx.tvm_weight_dict[weight_t] = weight
        res = _op.nn.dense(inputs[0], weight_t, units=C)
        return [res]
    res = torch.matmul(mat1, mat2)
    return [res]
Beispiel #4
0
def aten_addmm(inputs, attributes, scope):
    mat_to_add, mat1, mat2 = inputs[:3]
    beta, alpha = inputs[3:5]
    ctx = current_context()
    net = ctx.network
    if ctx.is_tensorrt and has_trt_tensor(inputs):
        assert beta == 1 and alpha == 1
        assert len(mat_to_add.shape) == 1
        inp = mat1
        weight = mat2.t().detach().cpu().numpy()
        bias = mat_to_add.detach().cpu().numpy()
        C = weight.shape[0]
        # use fc to implement this
        if len(inp.shape) < 3:
            inp = _trt_reshape(net, inp, [-1, 1, 1], scope + "/reshape")
        layer = net.add_fully_connected(inp, C, weight, bias)
        output = layer.get_output(0)
        output.name = scope
        layer.name = scope
        ctx.refit_weight_dict[layer.name] = {
            "type": "Linear",
            "weight": inputs[2].__torch2trt_weight_name,
            "bias": inputs[0].__torch2trt_weight_name,
        }
        return [output]
    elif ctx.is_tvm and has_tvm_tensor(inputs):
        inp = mat1
        weight = mat2.t().detach().cpu().numpy()
        bias = mat_to_add.detach().cpu().numpy()
        C = weight.shape[0]
        weight_t = _expr.var(scope + "/weight",
                             shape=weight.shape,
                             dtype="float32")
        ctx.tvm_weight_dict[weight_t] = weight
        ctx.refit_weight_dict[
            weight_t.name_hint] = inputs[2].__torch2trt_weight_name
        bias_t = _expr.var(scope + "/bias", shape=bias.shape, dtype="float32")
        ctx.tvm_weight_dict[bias_t] = bias
        ctx.refit_weight_dict[
            bias_t.name_hint] = inputs[0].__torch2trt_weight_name
        res = _op.nn.dense(inp, weight_t, units=C)
        res = _op.nn.bias_add(res, bias_t, axis=1)
        return [res]

    res = torch.addmm(beta, mat_to_add, alpha, mat1, mat2)
    return [res]
Beispiel #5
0
def aten_convolution(inputs, attributes, scope):
    inp, weight, bias = inputs[:3]
    stride, pad, dilation = inputs[3:6]
    transposed, output_padding, groups = inputs[6:9]
    ctx = current_context()
    net = ctx.network
    if transposed:
        I, O_groups, *ksize = weight.shape
        O = O_groups * groups
    else:
        O, I_groups, *ksize = weight.shape
        I = I_groups * groups
    if ctx.is_tensorrt and has_trt_tensor(inputs):
        assert all([e == 0 for e in output_padding
                    ]), "tensor rt don't support out padding"
        ndim = len(ksize)
        assert ndim == 2, "tensorrt only support 2d conv"
        # trt weight format: GKCRS: [num_groups, O_groups, I, H, W]
        weight = weight.detach().cpu().numpy()
        if bias is not None:
            trt_bias = bias.detach().cpu().numpy()
        else:
            trt_bias = trt.Weights()
        if transposed:
            layer = net.add_deconvolution(inputs[0], O, tuple(ksize), weight,
                                          trt_bias)
        else:
            layer = net.add_convolution(inputs[0], O, tuple(ksize), weight,
                                        trt_bias)
            layer.dilation = tuple(dilation)
        layer.stride = tuple(stride)
        layer.padding = tuple(pad)
        layer.num_groups = groups
        output = layer.get_output(0)
        output.name = scope
        layer.name = scope
        ctx.refit_weight_dict[layer.name] = {
            "type": "Convolution",
            "weight": inputs[1].__torch2trt_weight_name,
        }
        if bias is not None:
            ctx.refit_weight_dict[
                layer.name]["bias"] = bias.__torch2trt_weight_name
        return [output]
    elif ctx.is_tvm and has_tvm_tensor(inputs):
        weight = weight.detach().cpu().numpy()
        weight_t = _expr.var(scope + "/weight",
                             shape=weight.shape,
                             dtype="float32")
        ctx.tvm_weight_dict[weight_t] = weight
        if bias is not None:
            bias = bias.detach().cpu().numpy()
            bias_t = _expr.var(scope + "/bias",
                               shape=bias.shape,
                               dtype="float32")
            ctx.tvm_weight_dict[bias_t] = bias
        new_attrs = {}
        new_attrs["channels"] = O
        new_attrs["kernel_size"] = ksize
        new_attrs["strides"] = stride
        new_attrs["padding"] = pad
        new_attrs["dilation"] = dilation
        new_attrs["groups"] = groups
        new_attrs["data_layout"] = "NCHW"
        new_attrs["kernel_layout"] = "OIHW"
        use_bias = bias is not None
        if transposed:
            new_attrs["output_padding"] = output_padding
            res = _op.nn.conv2d_transpose(inputs[0], weight_t, **new_attrs)
        else:
            res = _op.nn.conv2d(inputs[0], weight_t, **new_attrs)
        if use_bias:
            res = _op.nn.bias_add(res, bias_t, axis=1)
        return [res]
    ndim = len(inputs[3])
    assert ndim == 2
    if transposed:
        res = F.conv_transpose2d(inp, weight, bias, stride, pad,
                                 output_padding, groups, dilation)
    else:
        res = F.conv2d(inp, weight, bias, stride, pad, dilation, groups)
    return [res]
Beispiel #6
0
def add_quant_params_to_outputs(outputs,
                                packed_param_map,
                                quant_params,
                                input_scales_for_bias,
                                keep_quantized_weight=False):
    """
    Add quant params to outputs so that they can be referenced by other
    ops later. Weights are quantized here.
    """
    for node_name, packed_param_name in packed_param_map.items():
        qparam = quant_params[packed_param_name]
        weight_scale = _get_numpy(qparam.scale)
        param_prefix = packed_param_name[:-len("._packed_params")]

        if keep_quantized_weight:
            qparam.weight_var = _expr.var(param_prefix + "_weight",
                                          shape=qparam.weight.shape,
                                          dtype="int8")
            qparam.weight = quantize_numpy(qparam.weight, weight_scale,
                                           _get_numpy(qparam.zero_point),
                                           np.int8)
            qweight = qparam.weight_var
        else:
            qparam.weight_var = _expr.var(param_prefix + "_weight",
                                          shape=qparam.weight.shape,
                                          dtype="float32")
            qweight = relay.qnn.op.quantize(qparam.weight_var,
                                            qparam.scale,
                                            qparam.zero_point,
                                            out_dtype="int8",
                                            axis=0)

        if qparam.bias is not None:
            float_bias_var = _expr.var(param_prefix + "_bias",
                                       shape=qparam.bias.shape,
                                       dtype="float32")
            if node_name not in input_scales_for_bias:
                # This case is for dynamic quantization, where the input activation scale is
                # unknown until runtime.
                qparam.bias_var = float_bias_var
                qbias = qparam.bias_var
            elif keep_quantized_weight:
                qparam.bias_var = _expr.var(param_prefix + "_bias",
                                            shape=qparam.bias.shape,
                                            dtype="int32")
                qparam.bias = quantize_numpy(
                    qparam.bias,
                    input_scales_for_bias[node_name] * weight_scale, 0,
                    np.int32)
                qbias = qparam.bias_var
            else:
                qparam.bias_var = float_bias_var
                qbias = relay.qnn.op.quantize(
                    qparam.bias_var,
                    _expr.const(input_scales_for_bias[node_name] *
                                weight_scale),
                    _expr.const(0, "int32"),
                    out_dtype="int32",
                    axis=0,
                )
        else:
            qbias = None

        quant_params[packed_param_name] = qparam

        params = [qweight, qparam.scale, qparam.zero_point, qbias]

        if isinstance(quant_params[packed_param_name], ConvPackedParam):
            params += [
                qparam.stride,
                qparam.padding,
                qparam.dilation,
                qparam.groups,
                qparam.output_padding,
            ]

        outputs[node_name] = params
Beispiel #7
0
def _torch_depoly(module,
                  example_inputs,
                  param_exclude=None,
                  param_include=None,
                  output_names=None,
                  input_tensors=None,
                  input_names=None,
                  verbose=False):
    """main entry point of torch2tvm.

    Args:
        module: pytorch nn.Module or function.
        example_inputs: list or tuple of example tensors. MUST match arguments of 
            forward function of module.
        param_exclude: regex string. filter unused weights and buffers if match.
        param_include: regex string. filter unused weights and buffers if not match.
        output_names: list of string. indicate output node name you want to use.
            note that pytorch jit node name don't contains any readable information.
            so I recommend not use this.
        input_tensors: list of trt.ITensor. if provided, will use this tensors to evaluate
            graph. otherwise will create new input tensors based on example_inputs
        input_names: list of string. MUST provided when run in trt mode. not required
            in pytorch debug mode.
        verbose: bool. 
    Returns:
        trace: traced jit module or function. MUST returned to avoid some C++ error.
        graph_pth: GraphPy object. use this to access pytorch graph and get
            resolved output tensors.
        tvm_module: 
    """
    trace = torch.jit.trace(module, example_inputs, True)
    if not isinstance(example_inputs, (list, tuple)):
        example_inputs = [example_inputs]
    graph_py = parse(trace.graph,
                     len(example_inputs),
                     omit_useless_nodes=False)
    msg = "input mismatch. this may due to some input isn't used in graph"
    assert len(example_inputs) == len(graph_py.get_input_nodes_dict()), msg
    if output_names is None:
        output_names = graph_py.get_output_names()
    if isinstance(module, torch.nn.Module):
        params, weight_names = _get_jit_params(module, param_exclude,
                                               param_include)
        num_param_inputs = len(graph_py.get_param_nodes())
        msg = "expected {} params, but get {} params. ".format(
            num_param_inputs, len(params))
        msg += "This may due to your network have multi output. use param_exclude to remove them"
        assert len(params) == num_param_inputs, msg
        for pnode, param, name in zip(graph_py.get_param_nodes(), params,
                                      weight_names):
            pnode.resolved_outputs[0] = param
            pnode.__torch2trt_weight_name = name
    ctx = current_context()
    net = ctx.network
    if ctx.is_tensorrt:
        assert input_names is not None, "trt mode must provide input name"
        if not isinstance(input_names, (list, tuple)):
            input_names = [input_names]
        assert len(input_names) == len(example_inputs)
        inputs = []
        if input_tensors is not None:
            if not isinstance(input_tensors, (list, tuple)):
                input_tensors = [input_tensors]
            assert len(input_tensors) == len(example_inputs)
            inputs = input_tensors
        else:
            for torch_inp, name in zip(example_inputs, input_names):
                tensor = net.add_input(name=name,
                                       dtype=trt.float32,
                                       shape=tuple(torch_inp.shape[1:]))
                inputs.append(tensor)
        for i, inode in enumerate(graph_py.get_input_nodes_dict().values()):
            inode.resolved_outputs[0] = inputs[i]
    elif ctx.is_tvm:
        assert input_names is not None, "tvm mode must provide input name"
        if not isinstance(input_names, (list, tuple)):
            input_names = [input_names]
        assert len(input_names) == len(example_inputs)
        inputs = []
        if input_tensors is not None:
            if not isinstance(input_tensors, (list, tuple)):
                input_tensors = [input_tensors]
            assert len(input_tensors) == len(example_inputs)
            inputs = input_tensors
        else:
            for torch_inp, name in zip(example_inputs, input_names):
                tensor = _expr.var(name,
                                   shape=torch_inp.shape,
                                   dtype="float32")
                inputs.append(tensor)
        for i, inode in enumerate(graph_py.get_input_nodes_dict().values()):
            inode.resolved_outputs[0] = inputs[i]
    else:
        # use torch inputs, debug mode
        for i, inode in enumerate(graph_py.get_input_nodes_dict().values()):
            inode.resolved_outputs[0] = example_inputs[i]
    resolve_graph(graph_py, output_names, verbose=verbose)
    graph_py.context = ctx
    # trace must be returned to avoid std::bad_alloc
    return trace, graph_py