def aten_dropout(inputs, attributes, scope): inp = inputs[0] net = current_network() if net is not None and has_trt_tensor(inputs): return [inputs[0]] rate, training = inputs[1:3] res = F.dropout2d(inp, rate, bool(training)) return [res]
def aten_div(inputs, attributes, scope): # print_inputs(inputs) lfs, rfs = inputs net = current_network() if net is not None and has_trt_tensor(inputs): output = _scale_or_elementwise(net, lfs, rfs, "div", scope) output.name = scope return [output] return [lfs / rfs]
def aten_relu(inputs, attributes, scope): net = current_network() if net is not None: layer = net.add_activation(inputs[0], trt.ActivationType.RELU) output = layer.get_output(0) output.name = scope layer.name = scope return [output] return [F.relu(inputs[0])]
def aten_add(inputs, attributes, scope): lfs, rfs, alpha = inputs assert alpha == 1 net = current_network() if net is not None and has_trt_tensor(inputs): output = _scale_or_elementwise(net, lfs, rfs, "add", scope) output.name = scope return [output] return [lfs + rfs]
def aten_size(inputs, attributes, scope): axis = inputs[1] net = current_network() if net is not None and has_trt_tensor(inputs): # trt tensor shape don't include batch axis if axis == 0: return [-1] else: return [inputs[0].shape[inputs[1] - 1]] return [inputs[0].shape[inputs[1]]]
def aten_sum(inputs, attributes, scope): inp, axes, keepdim = inputs net = current_network() if net is not None and has_trt_tensor(inputs): axis_trt = _axes_to_trt_axis(axes, len(inp.shape)) layer = net.add_reduce(inp, trt.ReduceOperation.SUM, axis_trt, bool(keepdim)) output = layer.get_output(0) output.name = scope layer.name = scope return [output] return [inp.sum(tuple(axes), keepdim=bool(keepdim))]
def aten_max(inputs, attributes, scope): inp, dim, keepdim = inputs net = current_network() if net is not None and has_trt_tensor(inputs): axis_trt = _axes_to_trt_axis([dim], len(inp.shape)) layer = net.add_reduce(inp, trt.ReduceOperation.MAX, axis_trt, bool(keepdim)) output = layer.get_output(0) layer.name = scope output.name = scope return [output, None] return [*inp.max(dim, keepdim=bool(keepdim))]
def aten_constant_pad_nd(inputs, attributes, scope): inp, pad_params, val = inputs net = current_network() if net is not None and has_trt_tensor(inputs): assert val == 0.0 w0, h0, w1, h1 = pad_params layer = net.add_padding(inp, (w0, h0), (w1, h1)) output = layer.get_output(0) output.name = scope layer.name = scope return [output] return [F.pad(inp, pad_params, value=val)]
def aten_cat(inputs, attributes, scope): tensors, dim = inputs net = current_network() if net is not None and has_trt_tensor(tensors): assert dim > 0 layer = net.add_concatenation(tensors) layer.axis = dim - 1 # trt don't support batch axis output = layer.get_output(0) output.name = scope layer.name = scope return [output] res = torch.cat(tensors, dim=dim) return [res]
def aten_permute(inputs, attributes, scope): inp, params = inputs net = current_network() if net is not None and has_trt_tensor(inputs): perm_params = params[1:] assert all([p > 0 for p in perm_params]) layer = net.add_shuffle(inp) layer.first_transpose = tuple(p - 1 for p in perm_params) output = layer.get_output(0) output.name = scope layer.name = scope return [output] return [inputs[0].permute(*params)]
def aten_select(inputs, attributes, scope): inp, dim, index = inputs net = current_network() if net is not None and has_trt_tensor(inputs): assert dim > 0 output = _trt_torch_slice(net, inp, dim, index, index + 1, 1, scope + "/slice") output.name = scope + "/slice" output = _trt_squeeze(net, output, dim, scope + "/squeeze") output.name = scope + "/squeeze" return [output] slice_ = slice(index, index + 1, 1) slices = [slice(None, None, None) for _ in range(dim + 1)] slices[dim] = slice_ return [inp[slices].squeeze(dim)]
def aten_max_pool2d(inputs, attributes, scope): inp = inputs[0] ksize, stride, pad, dilation = inputs[1:5] net = current_network() if net is not None and has_trt_tensor(inputs): layer = net.add_pooling(inp, trt.PoolingType.MAX, ksize) layer.stride = stride layer.padding = pad assert all([b == 1 for b in dilation]), "trt pool don't support dilation" output = layer.get_output(0) output.name = scope layer.name = scope return [output] res = F.max_pool2d(inp, ksize, stride, pad, dilation) return [res]
def aten_adaptive_avg_pool2d(inputs, attributes, scope): inp = inputs[0] ksize = inputs[1] net = current_network() if net is not None and has_trt_tensor(inputs): inp_shape = inp.shape[1:] ksize = [i // k for i, k in zip(inp_shape, ksize)] assert all([i % k == 0 for i, k in zip(inp_shape, ksize)]) layer = net.add_pooling(inp, trt.PoolingType.AVERAGE, ksize) # print("WARNING: adaptive_avg_pool2d support is imcomplete") output = layer.get_output(0) output.name = scope layer.name = scope return [output] inp = inputs[0] ksize = inputs[1] res = F.adaptive_avg_pool2d(inp, ksize) return [res]
def aten_avg_pool2d(inputs, attributes, scope): inp = inputs[0] ksize, stride, pad, ceil_mode, count_include_pad = inputs[1:6] net = current_network() if net is not None and has_trt_tensor(inputs): layer = net.add_pooling(inp, trt.PoolingType.AVERAGE, ksize) layer.stride = stride layer.padding = pad layer.average_count_excludes_padding = not count_include_pad output = layer.get_output(0) output.name = scope layer.name = scope return [output] inp = inputs[0] ksize, stride, pad, ceil_mode = inputs[1:5] res = F.avg_pool2d(inp, ksize, stride, pad, bool(ceil_mode), bool(count_include_pad)) return [res]
def aten_view(inputs, attributes, scope): assert len(inputs) == 2 net = current_network() if net is not None and has_trt_tensor(inputs): shape = inputs[1][1:] # trt tensor shape don't include batch axis # TODO add batch size check if len(shape) == 1: shape += [1, 1] # elif len(shape) == 2: # shape += [1] layer = net.add_shuffle(inputs[0]) layer.reshape_dims = shape output = layer.get_output(0) layer.name = scope output.name = scope return [output] return [inputs[0].view(*inputs[1])]
def aten_slice(inputs, attributes, scope): inp, dim, start, end, step = inputs net = current_network() if net is not None and has_trt_tensor(inputs): if dim == 0: if start == 0 and step == 1 and end > 100000000: return [inp] else: assert dim > 0, "tensorrt don't support batch axis operation" assert step == 1 output = _trt_torch_slice(net, inp, dim, start, end, step, scope) output.name = scope return [output] slice_ = slice(start, end, step) slices = [slice(None, None, None) for _ in range(dim + 1)] slices[dim] = slice_ # res = torch.slice(inp, dim, start, end, step) return [inp[slices]]
def aten_convolution(inputs, attributes, scope): inp, weight, bias = inputs[:3] stride, pad, dilation = inputs[3:6] transposed, output_padding, groups = inputs[6:9] net = current_network() if net is not None and has_trt_tensor(inputs): assert all([e == 0 for e in output_padding ]), "tensor rt don't support out padding" if transposed: I, O_groups, *ksize = weight.shape O = O_groups * groups else: O, I_groups, *ksize = weight.shape I = I_groups * groups ndim = len(ksize) assert ndim == 2, "tensorrt only support 2d conv" # trt weight format: GKCRS: [num_groups, O_groups, I, H, W] weight = weight.detach().cpu().numpy() if bias is not None: bias = bias.detach().cpu().numpy() else: bias = trt.Weights() if transposed: layer = net.add_deconvolution(inputs[0], O, tuple(ksize), weight, bias) else: layer = net.add_convolution(inputs[0], O, tuple(ksize), weight, bias) layer.dilation = tuple(dilation) layer.stride = tuple(stride) layer.padding = tuple(pad) layer.num_groups = groups output = layer.get_output(0) output.name = scope layer.name = scope return [output] ndim = len(inputs[3]) assert ndim == 2 if transposed: res = F.conv_transpose2d(inp, weight, bias, stride, pad, output_padding, groups, dilation) else: res = F.conv2d(inp, weight, bias, stride, pad, dilation, groups) return [res]
def aten_addmm(inputs, attributes, scope): mat_to_add, mat1, mat2 = inputs[:3] beta, alpha = inputs[3:5] net = current_network() if net is not None and has_trt_tensor(inputs): assert beta == 1 and alpha == 1 assert len(mat_to_add.shape) == 1 inp = mat1 weight = mat2.t().detach().cpu().numpy() bias = mat_to_add.detach().cpu().numpy() C = weight.shape[0] # use fc to implement this layer = net.add_fully_connected(inp, C, weight, bias) output = layer.get_output(0) output.name = scope layer.name = scope return [output] res = torch.addmm(beta, mat_to_add, alpha, mat1, mat2) return [res]
def aten_batch_norm(inputs, attributes, scope): inp, weight, bias, running_mean, running_var = inputs[:5] training, momentum, eps = inputs[5:8] # assert training is False net = current_network() if net is not None and has_trt_tensor(inputs): running_mean = running_mean.detach().cpu().numpy() running_var = running_var.detach().cpu().numpy() weight = weight.detach().cpu().numpy() bias = bias.detach().cpu().numpy() shift = (-running_mean / np.sqrt(running_var + eps)) * weight + bias scale = weight / np.sqrt(running_var + eps) layer = net.add_scale(inp, trt.ScaleMode.CHANNEL, shift, scale, np.ones_like(shift)) output = layer.get_output(0) output.name = scope layer.name = scope return [output] res = F.batch_norm(inp, running_mean, running_var, weight, bias, bool(training), momentum, eps) return [res]
def aten_detach(inputs, attributes, scope): inp = inputs[0] net = current_network() if net is not None and has_trt_tensor(inputs): raise NotImplementedError return [inp.detach()]
def aten_contiguous(inputs, attributes, scope): net = current_network() if net is not None and has_trt_tensor(inputs): return [inputs[0]] return [inputs[0].contiguous()]
def aten_to(inputs, attributes, scope): inp, dst = inputs[:2] net = current_network() if net is not None and has_trt_tensor(inputs): raise NotImplementedError return [inp.to(dst)]
def aten_unsqueeze(inputs, attributes, scope): inp, dim = inputs net = current_network() if net is not None and has_trt_tensor(inputs): return [_trt_unsqueeze(net, inp, dim, scope)] return [inp.unsqueeze(dim)]