def aten_batch_norm(inputs, attributes, scope): inp, weight, bias, running_mean, running_var = inputs[:5] training, momentum, eps = inputs[5:8] # assert training is False ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): running_mean = running_mean.detach().cpu().numpy() running_var = running_var.detach().cpu().numpy() weight = weight.detach().cpu().numpy() bias = bias.detach().cpu().numpy() shift = (-running_mean / np.sqrt(running_var + eps)) * weight + bias scale = weight / np.sqrt(running_var + eps) power = np.ones_like(shift) layer = net.add_scale(inp, trt.ScaleMode.CHANNEL, shift, scale, power) output = layer.get_output(0) output.name = scope layer.name = scope ctx.refit_weight_dict[layer.name] = { "type": "BatchNorm", "running_mean": inputs[3].__torch2trt_weight_name, "running_var": inputs[4].__torch2trt_weight_name, "weight": inputs[1].__torch2trt_weight_name, "bias": inputs[2].__torch2trt_weight_name, "eps": eps, } return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): running_mean = running_mean.detach().cpu().numpy() running_var = running_var.detach().cpu().numpy() weight = weight.detach().cpu().numpy() bias = bias.detach().cpu().numpy() running_mean_t = _expr.var(scope + "/running_mean", shape=running_mean.shape, dtype="float32") running_var_t = _expr.var(scope + "/running_var", shape=running_var.shape, dtype="float32") weight_t = _expr.var(scope + "/weight", shape=weight.shape, dtype="float32") bias_t = _expr.var(scope + "/bias", shape=bias.shape, dtype="float32") ctx.tvm_weight_dict[running_mean_t] = running_mean ctx.tvm_weight_dict[running_var_t] = running_var ctx.tvm_weight_dict[weight_t] = weight ctx.tvm_weight_dict[bias_t] = bias new_attrs = {} new_attrs["axis"] = 1 new_attrs["epsilon"] = eps new_attrs["center"] = True new_attrs["scale"] = True new_attrs['gamma'] = weight_t new_attrs['beta'] = bias_t new_attrs['moving_mean'] = running_mean_t new_attrs['moving_var'] = running_var_t result, moving_mean, moving_var = _op.nn.batch_norm(inp, **new_attrs) return [result] res = F.batch_norm(inp, running_mean, running_var, weight, bias, bool(training), momentum, eps) return [res]
def __init__(self, weight, bias, scale, zero_point, param_key): param_prefix = param_key[: -len("._packed_params")] self.weight_var = _expr.var(param_prefix + "_weight", shape=weight.shape) self.weight = weight if bias is not None: self.bias_var = _expr.var(param_prefix + "_bias", shape=bias.shape) self.bias = bias.detach().numpy() else: self.bias_var = None self.bias = None self.scale = _expr.const(scale) self.zero_point = _expr.const(zero_point, dtype="int32")
def aten_matmul(inputs, attributes, scope): mat1, mat2 = inputs[:2] ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): assert isinstance(mat2, torch.Tensor) inp = mat1 weight = mat2.t().detach().cpu().numpy() C = weight.shape[0] # use fc to implement this if len(inp.shape) < 3: inp = _trt_reshape(net, inp, [-1, 1, 1], scope + "/reshape") layer = net.add_fully_connected(inp, C, weight, trt.Weights()) output = layer.get_output(0) output.name = scope layer.name = scope ctx.refit_weight_dict[layer.name] = { "type": "Linear", "weight": inputs[1].__torch2trt_weight_name, } return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): inp = mat1 weight = mat2.t().detach().cpu().numpy() C = weight.shape[0] weight_t = _expr.var(scope + "/weight", shape=weight.shape, dtype="float32") ctx.tvm_weight_dict[weight_t] = weight res = _op.nn.dense(inputs[0], weight_t, units=C) return [res] res = torch.matmul(mat1, mat2) return [res]
def aten_addmm(inputs, attributes, scope): mat_to_add, mat1, mat2 = inputs[:3] beta, alpha = inputs[3:5] ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): assert beta == 1 and alpha == 1 assert len(mat_to_add.shape) == 1 inp = mat1 weight = mat2.t().detach().cpu().numpy() bias = mat_to_add.detach().cpu().numpy() C = weight.shape[0] # use fc to implement this if len(inp.shape) < 3: inp = _trt_reshape(net, inp, [-1, 1, 1], scope + "/reshape") layer = net.add_fully_connected(inp, C, weight, bias) output = layer.get_output(0) output.name = scope layer.name = scope ctx.refit_weight_dict[layer.name] = { "type": "Linear", "weight": inputs[2].__torch2trt_weight_name, "bias": inputs[0].__torch2trt_weight_name, } return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): inp = mat1 weight = mat2.t().detach().cpu().numpy() bias = mat_to_add.detach().cpu().numpy() C = weight.shape[0] weight_t = _expr.var(scope + "/weight", shape=weight.shape, dtype="float32") ctx.tvm_weight_dict[weight_t] = weight ctx.refit_weight_dict[ weight_t.name_hint] = inputs[2].__torch2trt_weight_name bias_t = _expr.var(scope + "/bias", shape=bias.shape, dtype="float32") ctx.tvm_weight_dict[bias_t] = bias ctx.refit_weight_dict[ bias_t.name_hint] = inputs[0].__torch2trt_weight_name res = _op.nn.dense(inp, weight_t, units=C) res = _op.nn.bias_add(res, bias_t, axis=1) return [res] res = torch.addmm(beta, mat_to_add, alpha, mat1, mat2) return [res]
def aten_convolution(inputs, attributes, scope): inp, weight, bias = inputs[:3] stride, pad, dilation = inputs[3:6] transposed, output_padding, groups = inputs[6:9] ctx = current_context() net = ctx.network if transposed: I, O_groups, *ksize = weight.shape O = O_groups * groups else: O, I_groups, *ksize = weight.shape I = I_groups * groups if ctx.is_tensorrt and has_trt_tensor(inputs): assert all([e == 0 for e in output_padding ]), "tensor rt don't support out padding" ndim = len(ksize) assert ndim == 2, "tensorrt only support 2d conv" # trt weight format: GKCRS: [num_groups, O_groups, I, H, W] weight = weight.detach().cpu().numpy() if bias is not None: trt_bias = bias.detach().cpu().numpy() else: trt_bias = trt.Weights() if transposed: layer = net.add_deconvolution(inputs[0], O, tuple(ksize), weight, trt_bias) else: layer = net.add_convolution(inputs[0], O, tuple(ksize), weight, trt_bias) layer.dilation = tuple(dilation) layer.stride = tuple(stride) layer.padding = tuple(pad) layer.num_groups = groups output = layer.get_output(0) output.name = scope layer.name = scope ctx.refit_weight_dict[layer.name] = { "type": "Convolution", "weight": inputs[1].__torch2trt_weight_name, } if bias is not None: ctx.refit_weight_dict[ layer.name]["bias"] = bias.__torch2trt_weight_name return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): weight = weight.detach().cpu().numpy() weight_t = _expr.var(scope + "/weight", shape=weight.shape, dtype="float32") ctx.tvm_weight_dict[weight_t] = weight if bias is not None: bias = bias.detach().cpu().numpy() bias_t = _expr.var(scope + "/bias", shape=bias.shape, dtype="float32") ctx.tvm_weight_dict[bias_t] = bias new_attrs = {} new_attrs["channels"] = O new_attrs["kernel_size"] = ksize new_attrs["strides"] = stride new_attrs["padding"] = pad new_attrs["dilation"] = dilation new_attrs["groups"] = groups new_attrs["data_layout"] = "NCHW" new_attrs["kernel_layout"] = "OIHW" use_bias = bias is not None if transposed: new_attrs["output_padding"] = output_padding res = _op.nn.conv2d_transpose(inputs[0], weight_t, **new_attrs) else: res = _op.nn.conv2d(inputs[0], weight_t, **new_attrs) if use_bias: res = _op.nn.bias_add(res, bias_t, axis=1) return [res] ndim = len(inputs[3]) assert ndim == 2 if transposed: res = F.conv_transpose2d(inp, weight, bias, stride, pad, output_padding, groups, dilation) else: res = F.conv2d(inp, weight, bias, stride, pad, dilation, groups) return [res]
def add_quant_params_to_outputs(outputs, packed_param_map, quant_params, input_scales_for_bias, keep_quantized_weight=False): """ Add quant params to outputs so that they can be referenced by other ops later. Weights are quantized here. """ for node_name, packed_param_name in packed_param_map.items(): qparam = quant_params[packed_param_name] weight_scale = _get_numpy(qparam.scale) param_prefix = packed_param_name[:-len("._packed_params")] if keep_quantized_weight: qparam.weight_var = _expr.var(param_prefix + "_weight", shape=qparam.weight.shape, dtype="int8") qparam.weight = quantize_numpy(qparam.weight, weight_scale, _get_numpy(qparam.zero_point), np.int8) qweight = qparam.weight_var else: qparam.weight_var = _expr.var(param_prefix + "_weight", shape=qparam.weight.shape, dtype="float32") qweight = relay.qnn.op.quantize(qparam.weight_var, qparam.scale, qparam.zero_point, out_dtype="int8", axis=0) if qparam.bias is not None: float_bias_var = _expr.var(param_prefix + "_bias", shape=qparam.bias.shape, dtype="float32") if node_name not in input_scales_for_bias: # This case is for dynamic quantization, where the input activation scale is # unknown until runtime. qparam.bias_var = float_bias_var qbias = qparam.bias_var elif keep_quantized_weight: qparam.bias_var = _expr.var(param_prefix + "_bias", shape=qparam.bias.shape, dtype="int32") qparam.bias = quantize_numpy( qparam.bias, input_scales_for_bias[node_name] * weight_scale, 0, np.int32) qbias = qparam.bias_var else: qparam.bias_var = float_bias_var qbias = relay.qnn.op.quantize( qparam.bias_var, _expr.const(input_scales_for_bias[node_name] * weight_scale), _expr.const(0, "int32"), out_dtype="int32", axis=0, ) else: qbias = None quant_params[packed_param_name] = qparam params = [qweight, qparam.scale, qparam.zero_point, qbias] if isinstance(quant_params[packed_param_name], ConvPackedParam): params += [ qparam.stride, qparam.padding, qparam.dilation, qparam.groups, qparam.output_padding, ] outputs[node_name] = params
def _torch_depoly(module, example_inputs, param_exclude=None, param_include=None, output_names=None, input_tensors=None, input_names=None, verbose=False): """main entry point of torch2tvm. Args: module: pytorch nn.Module or function. example_inputs: list or tuple of example tensors. MUST match arguments of forward function of module. param_exclude: regex string. filter unused weights and buffers if match. param_include: regex string. filter unused weights and buffers if not match. output_names: list of string. indicate output node name you want to use. note that pytorch jit node name don't contains any readable information. so I recommend not use this. input_tensors: list of trt.ITensor. if provided, will use this tensors to evaluate graph. otherwise will create new input tensors based on example_inputs input_names: list of string. MUST provided when run in trt mode. not required in pytorch debug mode. verbose: bool. Returns: trace: traced jit module or function. MUST returned to avoid some C++ error. graph_pth: GraphPy object. use this to access pytorch graph and get resolved output tensors. tvm_module: """ trace = torch.jit.trace(module, example_inputs, True) if not isinstance(example_inputs, (list, tuple)): example_inputs = [example_inputs] graph_py = parse(trace.graph, len(example_inputs), omit_useless_nodes=False) msg = "input mismatch. this may due to some input isn't used in graph" assert len(example_inputs) == len(graph_py.get_input_nodes_dict()), msg if output_names is None: output_names = graph_py.get_output_names() if isinstance(module, torch.nn.Module): params, weight_names = _get_jit_params(module, param_exclude, param_include) num_param_inputs = len(graph_py.get_param_nodes()) msg = "expected {} params, but get {} params. ".format( num_param_inputs, len(params)) msg += "This may due to your network have multi output. use param_exclude to remove them" assert len(params) == num_param_inputs, msg for pnode, param, name in zip(graph_py.get_param_nodes(), params, weight_names): pnode.resolved_outputs[0] = param pnode.__torch2trt_weight_name = name ctx = current_context() net = ctx.network if ctx.is_tensorrt: assert input_names is not None, "trt mode must provide input name" if not isinstance(input_names, (list, tuple)): input_names = [input_names] assert len(input_names) == len(example_inputs) inputs = [] if input_tensors is not None: if not isinstance(input_tensors, (list, tuple)): input_tensors = [input_tensors] assert len(input_tensors) == len(example_inputs) inputs = input_tensors else: for torch_inp, name in zip(example_inputs, input_names): tensor = net.add_input(name=name, dtype=trt.float32, shape=tuple(torch_inp.shape[1:])) inputs.append(tensor) for i, inode in enumerate(graph_py.get_input_nodes_dict().values()): inode.resolved_outputs[0] = inputs[i] elif ctx.is_tvm: assert input_names is not None, "tvm mode must provide input name" if not isinstance(input_names, (list, tuple)): input_names = [input_names] assert len(input_names) == len(example_inputs) inputs = [] if input_tensors is not None: if not isinstance(input_tensors, (list, tuple)): input_tensors = [input_tensors] assert len(input_tensors) == len(example_inputs) inputs = input_tensors else: for torch_inp, name in zip(example_inputs, input_names): tensor = _expr.var(name, shape=torch_inp.shape, dtype="float32") inputs.append(tensor) for i, inode in enumerate(graph_py.get_input_nodes_dict().values()): inode.resolved_outputs[0] = inputs[i] else: # use torch inputs, debug mode for i, inode in enumerate(graph_py.get_input_nodes_dict().values()): inode.resolved_outputs[0] = example_inputs[i] resolve_graph(graph_py, output_names, verbose=verbose) graph_py.context = ctx # trace must be returned to avoid std::bad_alloc return trace, graph_py