def aten_clone(inputs, attributes, scope): ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_identity(inputs[0]) output = layer.get_output(0) layer.name = scope output.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): raise NotImplementedError return [inputs[0].clone()]
def aten_relu_(inputs, attributes, scope): ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_activation(inputs[0], trt.ActivationType.RELU) output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): return [_op.nn.relu(inputs[0])] return [F.relu_(inputs[0])]
def aten_sigmoid(inputs, attributes, scope): ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_activation(inputs[0], trt.ActivationType.SIGMOID) output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): return [_op.sigmoid(inputs[0])] return [torch.sigmoid(inputs[0])]
def aten_softsign(inputs, attributes, scope): inp = inputs[0] ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_activation(inputs[0], trt.ActivationType.SOFTSIGN) output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): raise NotImplementedError return [F.softsign(inputs[0])]
def aten_tanh(inputs, attributes, scope): inp = inputs[:1] ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_activation(inp, trt.ActivationType.TANH) output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): return [_op.tanh(inputs[0])] return [F.tanh(inp)]
def aten_leaky_relu_(inputs, attributes, scope): inp, leak = inputs[:2] ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_activation(inp, trt.ActivationType.LEAKY_RELU) layer.alpha = leak output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): return [_op.nn.leaky_relu(inputs[0], leak)] return [F.leaky_relu_(inp, leak)]
def aten_size(inputs, attributes, scope): axis = inputs[1] ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): # trt tensor shape don't include batch axis if axis == 0: return [-1 ] # can't be None because prim::Int may take this result. else: return [inputs[0].shape[inputs[1] - 1]] elif ctx.is_tvm and has_tvm_tensor(inputs): inp_shape = infer_shape(inputs[0]) return [inp_shape[axis]] return [inputs[0].shape[inputs[1]]]
def aten_elu(inputs, attributes, scope): inp, alpha = inputs[:2] ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_activation(inputs[0], trt.ActivationType.ELU) layer.alpha = alpha output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): return [_op.nn.elu(inputs[0], alpha)] return [F.elu(inputs[0], alpha)]
def aten_transpose(inputs, attributes, scope): inp, dim0, dim1 = inputs ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): assert all([p > 0 for p in [dim0, dim1]]) params = list(range(len(inp.shape))) tmp = params[dim1 - 1] params[dim1 - 1] = params[dim0 - 1] params[dim0 - 1] = tmp layer = net.add_shuffle(inp) layer.first_transpose = tuple(params) output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): inp_shape = infer_shape(inp) params = list(range(len(inp_shape))) tmp = params[dim1] params[dim1] = params[dim0] params[dim0] = tmp return [_op.transform.transpose(inp, params)] return [torch.transpose(inputs[0], dim0, dim1)]
def aten_chunk(inputs, attributes, scope): inp, chunk, dim = inputs ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): assert dim > 0 # use slice to implement chunk outputs = [] step = inp.shape[dim - 1] // chunk for i in range(chunk): out = _trt_torch_slice(net, inp, dim, i * step, (i + 1) * step, 1, scope + "/slice_{}".format(i)) outputs.append(out) return [outputs] elif ctx.is_tvm and has_tvm_tensor(inputs): outputs = [] shape = infer_shape(inp) step = shape[dim] // chunk for i in range(chunk): out = _tvm_torch_slice(inp, dim, i * step, (i + 1) * step, 1, scope + "/slice_{}".format(i)) outputs.append(out) return [outputs] return [torch.chunk(inputs[0], chunk, dim)]
def aten_adaptive_avg_pool2d(inputs, attributes, scope): inp = inputs[0] ksize = inputs[1] ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): inp_shape = inp.shape[1:] ksize = [i // k for i, k in zip(inp_shape, ksize)] assert all([i % k == 0 for i, k in zip(inp_shape, ksize)]) layer = net.add_pooling(inp, trt.PoolingType.AVERAGE, ksize) # print("WARNING: adaptive_avg_pool2d support is imcomplete") output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): inp_shape = infer_shape(inp) inp_shape = inp_shape[2:] ksize = [i // k for i, k in zip(inp_shape, ksize)] assert all([i % k == 0 for i, k in zip(inp_shape, ksize)]) return [_op.nn.avg_pool2d(inp, ksize)] inp = inputs[0] ksize = inputs[1] res = F.adaptive_avg_pool2d(inp, ksize) return [res]
def aten_batch_norm(inputs, attributes, scope): inp, weight, bias, running_mean, running_var = inputs[:5] training, momentum, eps = inputs[5:8] # assert training is False ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): running_mean = running_mean.detach().cpu().numpy() running_var = running_var.detach().cpu().numpy() weight = weight.detach().cpu().numpy() bias = bias.detach().cpu().numpy() shift = (-running_mean / np.sqrt(running_var + eps)) * weight + bias scale = weight / np.sqrt(running_var + eps) power = np.ones_like(shift) layer = net.add_scale(inp, trt.ScaleMode.CHANNEL, shift, scale, power) output = layer.get_output(0) output.name = scope layer.name = scope ctx.refit_weight_dict[layer.name] = { "type": "BatchNorm", "running_mean": inputs[3].__torch2trt_weight_name, "running_var": inputs[4].__torch2trt_weight_name, "weight": inputs[1].__torch2trt_weight_name, "bias": inputs[2].__torch2trt_weight_name, "eps": eps, } return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): running_mean = running_mean.detach().cpu().numpy() running_var = running_var.detach().cpu().numpy() weight = weight.detach().cpu().numpy() bias = bias.detach().cpu().numpy() running_mean_t = _expr.var(scope + "/running_mean", shape=running_mean.shape, dtype="float32") running_var_t = _expr.var(scope + "/running_var", shape=running_var.shape, dtype="float32") weight_t = _expr.var(scope + "/weight", shape=weight.shape, dtype="float32") bias_t = _expr.var(scope + "/bias", shape=bias.shape, dtype="float32") ctx.tvm_weight_dict[running_mean_t] = running_mean ctx.tvm_weight_dict[running_var_t] = running_var ctx.tvm_weight_dict[weight_t] = weight ctx.tvm_weight_dict[bias_t] = bias new_attrs = {} new_attrs["axis"] = 1 new_attrs["epsilon"] = eps new_attrs["center"] = True new_attrs["scale"] = True new_attrs['gamma'] = weight_t new_attrs['beta'] = bias_t new_attrs['moving_mean'] = running_mean_t new_attrs['moving_var'] = running_var_t result, moving_mean, moving_var = _op.nn.batch_norm(inp, **new_attrs) return [result] res = F.batch_norm(inp, running_mean, running_var, weight, bias, bool(training), momentum, eps) return [res]
def aten_repeat(inputs, attributes, scope): inp, params = inputs ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): assert params[0] == 1 assert len(params) > 1 assert len(params) == len(inp.shape) + 1 # implement repeat by several gather operation, slower than native repeat i = 0 for p, s in zip(params[1:], inp.shape): if p > 1: repeat_weights = np.tile(np.arange(0, s), [p]).astype(np.int32) layer = net.add_constant([1, s * p], trt.Weights(repeat_weights)) layer.name = scope + "/" + "constant_{}".format(i) gather_inds = layer.get_output(0) gather_inds.name = scope + "/" + "constant_{}".format(i) layer = net.add_gather(inp, gather_inds, i) layer.name = scope + "/" + "gather_{}".format(i) out = layer.get_output(0) out.name = scope + "/" + "gather_{}".format(i) i += 1 return [out] elif ctx.is_tvm and has_tvm_tensor(inputs): raise NotImplementedError return [inp.repeat(*params)]
def aten_matmul(inputs, attributes, scope): mat1, mat2 = inputs[:2] ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): assert isinstance(mat2, torch.Tensor) inp = mat1 weight = mat2.t().detach().cpu().numpy() C = weight.shape[0] # use fc to implement this if len(inp.shape) < 3: inp = _trt_reshape(net, inp, [-1, 1, 1], scope + "/reshape") layer = net.add_fully_connected(inp, C, weight, trt.Weights()) output = layer.get_output(0) output.name = scope layer.name = scope ctx.refit_weight_dict[layer.name] = { "type": "Linear", "weight": inputs[1].__torch2trt_weight_name, } return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): inp = mat1 weight = mat2.t().detach().cpu().numpy() C = weight.shape[0] weight_t = _expr.var(scope + "/weight", shape=weight.shape, dtype="float32") ctx.tvm_weight_dict[weight_t] = weight res = _op.nn.dense(inputs[0], weight_t, units=C) return [res] res = torch.matmul(mat1, mat2) return [res]
def aten_avg_pool2d(inputs, attributes, scope): inp = inputs[0] ksize, stride, pad, ceil_mode, count_include_pad = inputs[1:6] if len(stride) == 0: stride = ksize ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_pooling(inp, trt.PoolingType.AVERAGE, ksize) layer.stride = stride layer.padding = pad layer.average_count_excludes_padding = not count_include_pad output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): new_attrs = {} new_attrs["pool_size"] = ksize new_attrs["strides"] = stride new_attrs["padding"] = pad new_attrs["ceil_mode"] = ceil_mode new_attrs["count_include_pad"] = count_include_pad return [_op.nn.avg_pool2d(inp, **new_attrs)] inp = inputs[0] ksize, stride, pad, ceil_mode = inputs[1:5] res = F.avg_pool2d(inp, ksize, stride, pad, bool(ceil_mode), bool(count_include_pad)) return [res]
def aten_max_pool2d(inputs, attributes, scope): inp = inputs[0] ksize, stride, pad, dilation, ceil_mode = inputs[1:6] if len(stride) == 0: stride = ksize ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_pooling(inp, trt.PoolingType.MAX, ksize) layer.stride = stride layer.padding = pad assert all([b == 1 for b in dilation]), "trt pool don't support dilation" output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): assert all([b == 1 for b in dilation]), "tvm maxpool don't support dilation" new_attrs = {} new_attrs["pool_size"] = ksize new_attrs["strides"] = stride new_attrs["padding"] = pad new_attrs["ceil_mode"] = ceil_mode return [_op.nn.max_pool2d(inp, **new_attrs)] res = F.max_pool2d(inp, ksize, stride, pad, dilation, bool(ceil_mode)) return [res]
def aten_softplus(inputs, attributes, scope): inp, beta, thresh = inputs[:3] ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_activation(inputs[0], trt.ActivationType.SOFTPLUS) layer.alpha = 1 / beta layer.beta = beta print("WARNING: tensorrt don't support threshold for softsign") output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): raise NotImplementedError return [F.softplus(inputs[0], beta, thresh)]
def aten_contiguous(inputs, attributes, scope): ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): return [inputs[0]] elif ctx.is_tvm and has_tvm_tensor(inputs): return [inputs[0]] return [inputs[0].contiguous()]
def aten_to(inputs, attributes, scope): inp, dst = inputs[:2] net = current_context().network if net is not None and has_trt_tensor(inputs): raise NotImplementedError res = inp.to(dst) if hasattr(inp, "__torch2trt_weight_name"): res.__torch2trt_weight_name = inp.__torch2trt_weight_name return [res]
def aten_unsqueeze(inputs, attributes, scope): inp, dim = inputs ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): return [_trt_unsqueeze(net, inp, dim, scope)] elif ctx.is_tvm and has_tvm_tensor(inputs): return [_tvm_unsqueeze(inp, dim, scope)] return [inp.unsqueeze(dim)]
def aten_flatten(inputs, attributes, scope): inp, start_dim, end_dim = inputs[:3] ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): assert len(inp.shape) == 3 assert start_dim == 1 and (end_dim == -1 or end_dim == len(inp.shape)) new_shape = [int(np.prod(list(inp.shape))), 1, 1] layer = net.add_shuffle(inputs[0]) layer.reshape_dims = new_shape output = layer.get_output(0) layer.name = scope output.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): assert len(infer_shape(inp)) == 4 assert start_dim == 1 and (end_dim == -1 or end_dim == len(infer_shape(inp)) - 1) return [_op.nn.batch_flatten(inputs[0])] return [torch.flatten(*inputs)]
def aten_dropout(inputs, attributes, scope): inp = inputs[0] ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): return [inputs[0]] elif ctx.is_tvm and has_tvm_tensor(inputs): return [inputs[0]] rate, training = inputs[1:3] res = F.dropout2d(inp, rate, bool(training)) return [res]
def aten_hardtanh_(inputs, attributes, scope): inp, min_val, max_val = inputs[:3] ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): # use relu(x) - relu(x - 6) to implement relu6 (subset of hardtanh) assert min_val == 0, "only support relu6" layer = net.add_activation(inp, trt.ActivationType.RELU) output = layer.get_output(0) layer.name = scope + "/relu" tensor = np.full([1] * len(inp.shape), max_val, dtype=np.float32) trt_6 = ctx.network.add_constant([1] * len(inp.shape), tensor) layer = ctx.network.add_elementwise(output, trt_6.get_output(0), trt.ElementWiseOperation.MIN) output = layer.get_output(0) layer.name = scope + "/elem_min" output.name = scope + "/relu6" return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): raise NotImplementedError return [F.hardtanh_(inp, min_val, max_val)]
def aten_view(inputs, attributes, scope): assert len(inputs) == 2 ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): shape = inputs[1][1:] # trt tensor shape don't include batch axis # TODO add batch size check if len(shape) == 1: shape += [1, 1] # elif len(shape) == 2: # shape += [1] layer = net.add_shuffle(inputs[0]) layer.reshape_dims = shape output = layer.get_output(0) layer.name = scope output.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): return [_op.reshape(inputs[0], newshape=inputs[1])] return [inputs[0].view(*inputs[1])]
def aten_cos(inputs, attributes, scope): inp = inputs[0] ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_unary(inp, trt.UnaryOperation.COS) output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): raise NotImplementedError return [torch.cos(inp)]
def aten_div(inputs, attributes, scope): # print_inputs(inputs) lfs, rfs = inputs ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): output = _scale_or_elementwise(net, lfs, rfs, "div", scope) output.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): lfs, rfs = _tvm_to_const([lfs, rfs]) return [_op.divide(lfs, rfs)] return [lfs / rfs]
def aten_add(inputs, attributes, scope): lfs, rfs, alpha = inputs assert alpha == 1 ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): output = _scale_or_elementwise(net, lfs, rfs, "add", scope) output.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): lfs, rfs = _tvm_to_const([lfs, rfs]) return [_op.add(lfs, rfs)] return [lfs + rfs]
def aten_index_select(inputs, attributes, scope): inp, axis, index = inputs ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_gather(inp, index, axis - 1) output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): raise NotImplementedError return [torch.index_select(inp, axis, index)]
def aten_hardtanh_(inputs, attributes, scope): inp, min_val, max_val = inputs[:3] ctx = current_context() net = current_context().network if ctx.is_tensorrt and has_trt_tensor(inputs): # use relu(x) - relu(x - 6) to implement relu6 (subset of hardtanh) assert min_val == 0, "only support relu6" layer = net.add_activation(inp, trt.ActivationType.RELU) output = layer.get_output(0) layer.name = scope + "/relu" inp_sub_6 = _scale_or_elementwise(net, inp, torch.tensor(max_val), "sub", scope + "/sub") layer = net.add_activation(inp_sub_6, trt.ActivationType.RELU) layer.name = scope + "/relu(x-6)" output_6 = layer.get_output(0) output = _scale_or_elementwise(net, output, output_6, "sub", scope + "/sub_relu") output.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): raise NotImplementedError return [F.hardtanh_(inp, min_val, max_val)]
def aten_neg(inputs, attributes, scope): inp = inputs[0] ctx = current_context() net = ctx.network if ctx.is_tensorrt and has_trt_tensor(inputs): layer = net.add_unary(inp, trt.UnaryOperation.NEG) output = layer.get_output(0) output.name = scope layer.name = scope return [output] elif ctx.is_tvm and has_tvm_tensor(inputs): return [_op.negative(inp)] return [torch.neg(inp)]