def unfold(g, input, dimension, size, step): const_size = sym_help._maybe_get_const(size, "i") const_step = sym_help._maybe_get_const(step, "i") if not sym_help._is_value(const_size) and not sym_help._is_value(const_step): from torch.onnx.symbolic_opset9 import unfold as _unfold return _unfold(g, input, dimension, const_size, const_step) if sym_help._operator_export_type == torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK: return g.op("ATen", input, operator_s="unfold", dimension_i=dimension, size_i=size, step_i=step) sizedim = sym_help._get_tensor_dim_size(input, dimension) if sizedim is not None: low_start = g.op("Constant", value_t=torch.tensor(0)) low_end = g.op("Constant", value_t=torch.tensor(sizedim)) hi_end = g.op("Constant", value_t=torch.tensor(sizedim + 1)) low_indices = g.op("Range", low_start, low_end, step) hi_indices = g.op("Range", size, hi_end, step) low_size = sym_help._size_helper(g, low_indices, g.op("Constant", value_t=torch.tensor(0))) hi_size = sym_help._size_helper(g, hi_indices, g.op("Constant", value_t=torch.tensor(0))) ndim = sym_help._get_tensor_rank(input) perm = list(range(0, ndim)) perm.append(perm.pop(dimension)) unsqueeze_list = [] loop_condition = g.op("Constant", value_t=torch.tensor(1)) loop_condition = g.op("Cast", loop_condition, to_i=9) loop_len = g.op("Min", low_size, hi_size) loop = g.op("Loop", loop_len, loop_condition) loop_block = _add_block(loop.node()) block_input_iter = _add_input_to_block(loop_block) cond = _add_input_to_block(loop_block) starts = loop_block.op("Gather", low_indices, block_input_iter) ends = loop_block.op("Gather", hi_indices, block_input_iter) axes = loop_block.op("Constant", value_t=torch.tensor([2])) starts = sym_help._unsqueeze_helper(loop_block, starts, [0]) ends = sym_help._unsqueeze_helper(loop_block, ends, [0]) stack = loop_block.op("Slice", input, starts, ends, axes) unsqueeze = sym_help._unsqueeze_helper(loop_block, loop_block.op("Transpose", stack, perm_i=perm), [dimension]) unsqueeze_list.append(unsqueeze) concat = loop_block.op("Concat", *unsqueeze_list, axis_i=0) cond_out = loop_block.op("Cast", loop_condition, to_i=9) _add_output_to_block(loop_block, cond_out) _add_output_to_block(loop_block, concat) loop_output = loop.node().output() perm = [0, 1, 2, 3, 4] perm[0], perm[dimension + 1] = perm[dimension + 1], perm[0] transpose = g.op("Transpose", loop_output, perm_i=perm) squeeze = sym_help._squeeze_helper(g, transpose, [0]) return squeeze else: return _unimplemented("Unfold", "input size not accessible")
def embedding_bag(g, embedding_matrix, indices, offsets, scale_grad_by_freq, mode, sparse, per_sample_weights, include_last_offset, padding_idx): if scale_grad_by_freq and sym_help._training_mode: return sym_help._onnx_unsupported("embedding_bag with scale_grad_by_freq for training mode") if padding_idx is not None and padding_idx >= 0: raise RuntimeError("embedding_bag with padding_idx") from torch.onnx.symbolic_opset9 import select import warnings warnings.warn("Export of embedding_bag with dynamic input/offsets shape is not supported in opset 10. " "Please use opset 11 or higher to export model for dynamic input shape.'") offsets_dim_0 = sym_help._get_tensor_dim_size(offsets, 0) if offsets_dim_0 is not None: if include_last_offset: offset_len = offsets_dim_0 - 1 offsets_extended = offsets else: offset_len = offsets_dim_0 offsets_extended = [offsets, g.op("Constant", value_t=torch.tensor([maxsize]))] offsets_extended = g.op("Concat", *offsets_extended, axis_i=0) list_ = [] for i in range(offset_len): start_ = sym_help._unsqueeze_helper(g, select(g, offsets_extended, torch.tensor(0), torch.tensor(i)), [0]) end_ = sym_help._unsqueeze_helper(g, select(g, offsets_extended, torch.tensor(0), torch.tensor(i + 1)), [0]) axes_ = g.op("Constant", value_t=torch.tensor([0])) indices_row = g.op("Slice", indices, start_, end_, axes_) embeddings = g.op("Gather", embedding_matrix, indices_row) if not sym_help._is_none(per_sample_weights): per_sample_weights_row = g.op("Slice", per_sample_weights, start_, end_, axes_) per_sample_weights_row = sym_help._unsqueeze_helper(g, per_sample_weights_row, [1]) embeddings = g.op("Mul", embeddings, per_sample_weights_row) if mode == 0: embeddings = sym_help._reducesum_helper(g, embeddings, axes_i=[0], keepdims_i=0) elif mode == 1: embeddings = g.op("ReduceMean", embeddings, axes_i=[0], keepdims_i=0) else: embeddings = g.op("ReduceMax", embeddings, axes_i=[0], keepdims_i=0) embeddings = sym_help._unsqueeze_helper(g, embeddings, [0]) list_.append(embeddings) output = g.op("Concat", *list_, axis_i=0) # aten::embedding_bag returns a tuple of 4 elements: output, offset2bag, bag_size, max_indices. # But the last three outputs are not used in torch.nn.EmbeddingBag or torch.nn.functional.embedding_bag. return output, None, None, None else: return sym_help._onnx_unsupported("embedding_bag with unknown shape of offsets for opset 10 is not supported. " "please use opset 11 or higher.")
def _get_im2col_output_shape(g, input, kernel_h, kernel_w): batch_dim = size(g, input, g.op("Constant", value_t=torch.tensor(0))) channel_dim = size(g, input, g.op("Constant", value_t=torch.tensor(1))) channel_unfolded = g.op("Mul", channel_dim, g.op("Constant", value_t=torch.tensor(kernel_h * kernel_w))) return g.op("Concat", sym_help._unsqueeze_helper(g, batch_dim, [0]), sym_help._unsqueeze_helper(g, channel_unfolded, [0]), g.op("Constant", value_t=torch.tensor([-1])), axis_i=0)
def unsqueeze_and_permute_for_mul(g, tensor, unsqueeze_axes, perm): # If perm is sorted after removing unsqueeze axes, then permute is not needed. # For example, a.unsqueeze(2).permute([0, 2, 1]) is same as a.unsqueeze(1). if unsqueeze_axes: new_perm = [v for v in perm if v not in unsqueeze_axes] sorted = all(new_perm[i] < new_perm[i + 1] for i in range(len(new_perm) - 1)) if sorted: return sym_help._unsqueeze_helper(g, tensor, [perm.index(axis) for axis in unsqueeze_axes]) if len(unsqueeze_axes) > 0: tensor = sym_help._unsqueeze_helper(g, tensor, unsqueeze_axes) if need_permute(perm): tensor = g.op("Transpose", tensor, perm_i=perm) return tensor
def split(g, self, split_size_or_sizes, dim, _outputs=None): if not sym_help._is_split_static(split_size_or_sizes, _outputs): split_out = g.op("SplitToSequence", self, split_size_or_sizes, axis_i=dim) if _outputs is None: return split_out # Convert to multiple slice nodes iff number of splits and number of outputs are statically known. if sym_help._is_packed_list(split_size_or_sizes) and len( sym_help._unpack_list(split_size_or_sizes)) == _outputs: split_sizes = [ sym_help._unsqueeze_helper(g, v, [0]) for v in sym_help._unpack_list(split_size_or_sizes) ] start = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long)) axis = g.op("Constant", value_t=torch.tensor([dim], dtype=torch.long)) res = [] for i in range(_outputs): end = g.op( "Add", start, split_sizes[i] ) # split_sizes is a list of same length as _outputs res.append(g.op("Slice", self, start, end, axis)) start = end return res return [ g.op("SequenceAt", split_out, g.op("Constant", value_t=torch.tensor([i], dtype=torch.long))) for i in range(_outputs) ] else: return torch.onnx.symbolic_opset9.split(g, self, split_size_or_sizes, dim, _outputs)
def embedding_renorm(g, weight, indices, max_norm, norm_type): unique_indices = g.op("Unique", indices) partial_weight = g.op("Gather", weight, unique_indices) norm_type = int(norm_type) if norm_type == 1: norm_type = "ReduceL1" elif norm_type == 2: norm_type = "ReduceL2" else: raise RuntimeError( f"Unsupported: ONNX export of embedding_renorm with norm: {norm_type}. " "Only 1. and 2. are supported.") partial_weight_norm = g.op(norm_type, partial_weight, axes_i=[1], keepdims_i=1) # https://github.com/pytorch/pytorch/blob/0a07488ed2c47765e337e290bd138c0e6e459cbd/aten/src/ATen/native/Embedding.cpp#L177 # Add 1e-7 to prevent division by zero. partial_weight_norm_ = g.op("Add", partial_weight_norm, g.op("Constant", value_t=torch.tensor(1e-7))) max_norm = torch.tensor(max_norm) scales = g.op("Div", max_norm, partial_weight_norm_) partial_weight_renorm = g.op("Mul", partial_weight, scales) partial_weight_renorm = g.op( "Where", g.op("Greater", partial_weight_norm, max_norm), partial_weight_renorm, partial_weight) return g.op("ScatterND", weight, sym_help._unsqueeze_helper(g, unique_indices, [1]), partial_weight_renorm)
def _get_im2col_indices_along_dim(g, input_d, kernel_size_d, dilation_d, padding_d, stride_d): # Input is always 4-D (N, C, H, W) # Calculate indices of sliding blocks along spatial dimension # Slide kernel over input each dim d: # each dimension d ranges from 0 to input[d]+2xpadding[d]-dilation[d]x(kernel_size[d]-1) # with steps = stride blocks_d = g.op("Add", input_d, g.op("Constant", value_t=torch.tensor(padding_d * 2))) blocks_d = g.op( "Sub", blocks_d, g.op("Constant", value_t=torch.tensor(dilation_d * (kernel_size_d - 1)))) # Stride kernel over input and find starting indices along dim d blocks_d_indices = g.op("Range", g.op("Constant", value_t=torch.tensor(0)), blocks_d, g.op("Constant", value_t=torch.tensor(stride_d))) # Apply dilation on kernel and find its indices along dim d kernel_grid = numpy.arange(0, kernel_size_d * dilation_d, dilation_d) kernel_grid = g.op("Constant", value_t=torch.tensor([kernel_grid])) # Broadcast and add kernel staring positions (indices) with # kernel_grid along dim d, to get block indices along dim d blocks_d_indices = sym_help._unsqueeze_helper(g, blocks_d_indices, [0]) # Reshape to [1, -1] kernel_mask = g.op('Reshape', kernel_grid, g.op('Constant', value_t=torch.tensor([-1, 1]))) block_mask = g.op("Add", blocks_d_indices, kernel_mask) return block_mask
def split(g, self, split_size_or_sizes, dim, _outputs=None): if not sym_help._is_split_static(split_size_or_sizes, _outputs): split_out = g.op("SplitToSequence", self, split_size_or_sizes, axis_i=dim) if _outputs is None: return split_out # Convert to multiple slice nodes iff number of splits and number of outputs are statically known. if sym_help._is_packed_list(split_size_or_sizes) and \ len(sym_help._unpack_list(split_size_or_sizes)) == _outputs: split_sizes = [sym_help._unsqueeze_helper(g, v, [0]) for v in sym_help._unpack_list(split_size_or_sizes)] start = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long)) axis = g.op("Constant", value_t=torch.tensor([dim], dtype=torch.long)) res = [] for i in range(_outputs): end = g.op("Add", start, split_sizes[i]) # split_sizes is a list of same length as _outputs res.append(g.op("Slice", self, start, end, axis)) start = end return res return [g.op("SequenceAt", split_out, g.op("Constant", value_t=torch.tensor([i], dtype=torch.long))) for i in range(_outputs)] split_val = split_size_or_sizes.node()['value'] if split_val.dim() > 0: return g.op("Split", self, split_size_or_sizes, axis_i=dim, outputs=_outputs) split_size = sym_help._get_const(split_size_or_sizes, 'i', 'split_size') size = self.type().sizes()[dim] splits = [split_size] * (size // split_size) leftover = size % split_size if leftover: splits.append(leftover) splits = g.op("Constant", value_t=torch.tensor(splits)) return g.op("Split", self, splits, axis_i=dim, outputs=_outputs)
def split(g, self, split_size_or_sizes, dim, _outputs=None): if not symbolic_helper._is_split_static(split_size_or_sizes, _outputs): split_out = g.op("SplitToSequence", self, split_size_or_sizes, axis_i=dim) if _outputs is None: return split_out # Convert to multiple slice nodes iff number of splits and number of outputs are statically known. if (symbolic_helper._is_packed_list(split_size_or_sizes) and len(symbolic_helper._unpack_list(split_size_or_sizes)) == _outputs): split_sizes = [ symbolic_helper._unsqueeze_helper(g, v, [0]) for v in symbolic_helper._unpack_list(split_size_or_sizes) ] start = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long)) axis = g.op("Constant", value_t=torch.tensor([dim], dtype=torch.long)) res = [] for i in range(_outputs): end = g.op( "Add", start, split_sizes[i] ) # split_sizes is a list of same length as _outputs res.append(g.op("Slice", self, start, end, axis)) start = end return res return [ g.op( "SequenceAt", split_out, g.op("Constant", value_t=torch.tensor([i], dtype=torch.long)), ) for i in range(_outputs) ] split_val = split_size_or_sizes.node()["value"] if split_val.dim() > 0: return g.op("Split", self, split_size_or_sizes, axis_i=dim, outputs=_outputs) split_size = symbolic_helper._get_const(split_size_or_sizes, "i", "split_size") size = symbolic_helper._get_tensor_dim_size(self, dim) if size is None: if _outputs is not None: size = split_size * _outputs else: raise RuntimeError("Unknown dimension size not supported") splits = [split_size] * (size // split_size) leftover = size % split_size if leftover: splits.append(leftover) splits = g.op("Constant", value_t=torch.tensor(splits)) return g.op("Split", self, splits, axis_i=dim, outputs=_outputs)
def gen_concat(g: torch._C.Graph, *args: Any) -> torch._C.Value: seq: List[torch._C.Value] = [] for i in args: if i.type().kind() == "IntType" or len(i.type().sizes()) == 0: seq.append( sym_hel._unsqueeze_helper(g, i, axes_i=[0]) # type: ignore[no-untyped-call,call-arg] ) else: seq.append(i) return cast(torch._C.Value, g.op("Concat", *seq, axis_i=0))
def _slice(g, input, axes, starts, ends, steps=None, dynamic_slice=False): if dynamic_slice: starts = sym_help._unsqueeze_helper(g, starts, [0]) ends = sym_help._unsqueeze_helper(g, ends, [0]) if isinstance(axes, int): axes = g.op("Constant", value_t=torch.tensor(axes)) axes = sym_help._unsqueeze_helper(g, axes, [0]) else: assert len(starts) == len(ends) assert len(starts) == len(axes) assert steps is None or len(starts) == len(steps) if len(starts) == 1 and starts[0] == 0 and ends[0] == 9223372036854775807 \ and (steps is None or (len(steps) == 1 and steps[0] == 1)): return input axes = g.op("Constant", value_t=torch.tensor(axes)) starts = g.op("Constant", value_t=torch.tensor(starts)) ends = g.op("Constant", value_t=torch.tensor(ends)) if steps is None: return g.op("Slice", input, starts, ends, axes) steps = g.op("Constant", value_t=torch.tensor(steps)) return g.op("Slice", input, starts, ends, axes, steps)
def unsqueeze(g, self, dim): if sym_help._is_constant(dim): dim = sym_help._get_const(dim, "i", "dim") return sym_help._unsqueeze_helper(g, self, [dim])
def permute_and_reshape_tensor( g, tensor, is_lhs, rank, perm, matmul_output_axes, contraction_axes, batch_length, matmul_output_numel_tensor, contraction_numel_tensor, shape_tensor, ): # If matmul_output_axes and contraction_axes are contiguous in input tensor, # we can move Reshape to before Transpose, so it's possible that the Transpoase is fused to MatMul. # Otherwise, we have to Transpose first to move those axes together and then Reshape. is_matmul_output_axes_contiguous = is_axes_contiguous(matmul_output_axes) is_contraction_axes_contiguous = is_axes_contiguous(contraction_axes) if is_matmul_output_axes_contiguous and is_contraction_axes_contiguous: # Combine contiguous axes to one axis. first_matmul_output_axis = matmul_output_axes[0] if len( matmul_output_axes) > 1 else -1 first_contraction_axis = contraction_axes[0] if len( contraction_axes) > 1 else -1 # If length of matmul_output_axes and contraction_axes are less than 2, no need to Reshape, # it needs an Unsqueeze and a Transpose if needed. if first_matmul_output_axis == -1 and first_contraction_axis == -1: assert not matmul_output_axes and len(contraction_axes) == 1 if need_permute(perm): new_tensor = sym_help._unsqueeze_helper(g, tensor, [-1]) pos = batch_length if is_lhs else len(perm) perm = perm[:pos] + [len(perm)] + perm[pos:] new_tensor = g.op("Transpose", new_tensor, perm_i=perm) else: new_tensor = sym_help._unsqueeze_helper( g, tensor, [batch_length if is_lhs else -1]) else: axes_to_remove = contraction_axes[ 1:] # contraction_axes can't be empty. if len(matmul_output_axes) > 1: axes_to_remove = axes_to_remove + matmul_output_axes[1:] remaining_axes = [ axis for axis in range(rank) if axis not in axes_to_remove ] # Calculate the new shape, use 0 or -1 if possible. shape_tensors = [] before_contiguous_axes = True last_zero_dim = -1 has_neg_one_dim = False for axis in remaining_axes: if axis == first_matmul_output_axis: shape_tensors.append(matmul_output_numel_tensor) before_contiguous_axes = False elif axis == first_contraction_axis: shape_tensors.append(contraction_numel_tensor) before_contiguous_axes = False elif before_contiguous_axes: shape_tensors.append( g.op("Constant", value_t=torch.tensor([0], dtype=torch.int64))) last_zero_dim = len(shape_tensors) - 1 elif axis == remaining_axes[-1]: shape_tensors.append( g.op("Constant", value_t=torch.tensor([-1], dtype=torch.int64))) has_neg_one_dim = True else: single_axis_shape_tensor, _, shape_tensor = get_shape_tensor_by_axes( g, tensor, shape_tensor, [axis], False) shape_tensors.append(single_axis_shape_tensor) if not has_neg_one_dim and last_zero_dim >= 0: shape_tensors[last_zero_dim] = g.op("Constant", value_t=torch.tensor( [-1], dtype=torch.int64)) # Adjust the perm. perm = [axis for axis in perm if axis not in axes_to_remove] new_axis = 0 for axis in remaining_axes: perm[perm.index(axis)] = new_axis new_axis += 1 # If matmul_output_axes is empty, need to add a dim-1 axis. if not matmul_output_axes: shape_tensors.append( g.op("Constant", value_t=torch.tensor([1], dtype=torch.int64))) pos = batch_length if is_lhs else len(perm) perm = perm[:pos] + [new_axis] + perm[pos:] new_tensor = reshape_tensor(g, tensor, shape_tensors) if need_permute(perm): new_tensor = g.op("Transpose", new_tensor, perm_i=perm) else: if need_permute(perm): new_tensor = g.op("Transpose", tensor, perm_i=perm) # Calculate the new shape, use 0 or -1 if possible. shape_tensors = [ g.op("Constant", value_t=torch.tensor([0], dtype=torch.int64)) ] * batch_length if is_lhs: if matmul_output_numel_tensor is None: matmul_output_numel_tensor = g.op( "Constant", value_t=torch.tensor([1 - len(matmul_output_axes)], dtype=torch.int64)) shape_tensors.append(matmul_output_numel_tensor) shape_tensors.append( g.op("Constant", value_t=torch.tensor([-1], dtype=torch.int64))) else: if ( contraction_numel_tensor is None ): # contraction_axes can't be empty, None here means only one contraction axis. contraction_numel_tensor = g.op("Constant", value_t=torch.tensor( [0], dtype=torch.int64)) shape_tensors.append(contraction_numel_tensor) shape_tensors.append( g.op("Constant", value_t=torch.tensor([-1], dtype=torch.int64))) new_tensor = reshape_tensor(g, new_tensor, shape_tensors) return new_tensor, shape_tensor
def index_put(g, self, indices_list_value, values, accumulate=False): indices_list = sym_help._unpack_list(indices_list_value) if sym_help._operator_export_type == torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK: args = [self] + indices_list + [values, accumulate] return g.op("ATen", *args, operator_s='index_put') from torch.onnx.symbolic_opset9 import add, expand accumulate = sym_help._parse_arg(accumulate, 'b') index = indices_list[0] if len(indices_list) > 1: for ind in indices_list[1:]: index = add(g, index, ind) broadcast_index_shape = g.op("Shape", index) indices_list = [ sym_help._unsqueeze_helper( g, expand(g, ind, broadcast_index_shape, None), [-1]) for ind in indices_list ] index = g.op("Concat", *indices_list, axis_i=-1) else: # Replace index_put node with masked_scatter or masked_fill # when inputs to the index_put node contains boolean inputs # # index_put -> masked_fill # # before graph(%0 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=1, device=cpu), # %some_const : Float(requires_grad=0, device=cpu)): # %6 : None = prim::Constant() # %mask : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = aten::clone(%0, %6) # %8 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = aten::ne(%mask, %some_const) # %26 : Long(requires_grad=0, device=cpu) = prim::Constant[value={11}]() # %27 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %11 : Device = prim::Constant[value="cpu"]() # %12 : None = prim::Constant() # %28 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %29 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %15 : None = prim::Constant() # %16 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = # aten::to(%8, %26, %27, %11, %12, %28, %29, %15) # %18 : Float(requires_grad=0, device=cpu) = prim::Constant[value={1}]() # %30 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %22 : int[] = prim::Constant[value=[-1]]() # %23 : Tensor = aten::view(%16, %22) # %24 : Tensor?[] = prim::ListConstruct(%23) # %25 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = # aten::index_put(%mask, %24, %18, %30) # return (%25) # # after graph(%0 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu), # %some_const : Float(requires_grad=0, device=cpu)): # %3 : Tensor = onnx::Equal(%0, %some_const) # %4 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = onnx::Not(%3) # %12 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = onnx::Cast[to=9](%4) # %19 : Tensor = onnx::Cast[to=9](%12) # %20 : Tensor = onnx::Constant[value={1}]() # %21 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = onnx::Where(%19, %20, %0) # return (%21) # # index_put -> masked_scatter # # before graph(%0 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=1, device=cpu), # %some_const : Float(requires_grad=0, device=cpu)): # %6 : None = prim::Constant() # %mask : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = aten::clone(%0, %6) # %28 : Float(8, strides=[1], requires_grad=0, device=cpu) # = prim::Constant[value= 1 1 1 1 1 1 1 1 [ CPUFloatType{8} ]]() # %15 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = aten::ne(%mask, %some_const) # %34 : Long(requires_grad=0, device=cpu) = prim::Constant[value={11}]() # %35 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %18 : Device = prim::Constant[value="cpu"]() # %19 : None = prim::Constant() # %36 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %37 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %22 : None = prim::Constant() # %23 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = aten::to(%15, %34, %35, %18, %19, %36, %37, %22) # %38 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %30 : int[] = prim::Constant[value=[-1]]() # %31 : Tensor = aten::view(%23, %30) # %32 : Tensor?[] = prim::ListConstruct(%31) # %33 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = aten::index_put(%mask, %32, %28, %38) # return (%33) # # after graph(%0 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu), # %some_const : Float(requires_grad=0, device=cpu)): # %3 : Float(8, strides=[1], requires_grad=0, device=cpu) # = onnx::Constant[value= 1 1 1 1 1 1 1 1 [ CPUFloatType{8} ]]() # %4 : Tensor = onnx::Equal(%0, %some_const) # %5 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = onnx::Not(%4) # %13 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = onnx::Cast[to=9](%5) # %19 : Tensor = onnx::Shape(%0) # %20 : Tensor = onnx::Expand(%13, %19) # %21 : Tensor = onnx::NonZero(%20) # %22 : Tensor = onnx::Transpose[perm=[1, 0]](%21) # %23 : Tensor = onnx::Constant[value={-1}]() # %24 : Tensor = onnx::Reshape(%3, %23) # %25 : Tensor = onnx::Shape(%22) # %27 : Tensor = onnx::Constant[value={0}]() # %28 : Tensor = onnx::Gather[axis=0](%25, %27) # %29 : Tensor = onnx::Constant[value={0}]() # %30 : Tensor = onnx::Unsqueeze[axes=[0]](%29) # %31 : Tensor = onnx::Unsqueeze[axes=[0]](%28) # %32 : Tensor = onnx::Constant[value={0}]() # %33 : Tensor = onnx::Unsqueeze[axes=[0]](%32) # %34 : Tensor = onnx::Slice(%24, %30, %31, %33) # %35 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = onnx::ScatterND(%0, %22, %34) # return (%35) bool_inp = list(index.node().inputs())[0] if bool_inp.type() is not None and bool_inp.type().scalarType( ) == 'Bool': rank = sym_help._get_tensor_rank(values) if rank is not None and rank == 0: from torch.onnx.symbolic_opset9 import masked_fill return masked_fill(g, self, bool_inp, values) return masked_scatter(g, self, bool_inp, values) broadcast_index_shape = g.op("Shape", index) index = sym_help._unsqueeze_helper(g, index, [-1]) sub_data_shape = sym_help._slice_helper(g, g.op("Shape", self), axes=[0], starts=[len(indices_list)], ends=[maxsize]) values_shape = g.op("Concat", broadcast_index_shape, sub_data_shape, axis_i=0) values = g.op("Reshape", values, values_shape) if accumulate: dtype = self.type().scalarType() dtype = sym_help.scalar_type_to_onnx.index( sym_help.cast_pytorch_to_onnx[dtype]) dtype = sym_help.scalar_type_to_pytorch_type[dtype] zeros = g.op("ConstantOfShape", g.op("Shape", self), value_t=torch.tensor([0], dtype=dtype)) result = g.op("ScatterND", zeros, index, values) result = add(g, self, result) else: result = g.op("ScatterND", self, index, values) return result
def index_put(g, self, indices_list_value, values, accumulate=False): if symbolic_helper._is_packed_list(indices_list_value): indices_list = symbolic_helper._unpack_list(indices_list_value) else: indices_list = [indices_list_value] if symbolic_helper.is_caffe2_aten_fallback(): args = [self] + indices_list + [values, accumulate] return g.at("index_put", *args) accumulate = symbolic_helper._parse_arg(accumulate, "b") if len(indices_list) == 0: return values if len(indices_list) > 1: for idx_ in range(len(indices_list)): if indices_list[idx_].type().scalarType() == "Bool": # type: ignore[attr-defined] # TODO(justinchuby): Remove type ignore after #81112 is checked in. indices_list[idx_] = g.op("NonZero", indices_list[idx_]) index = indices_list[0] for ind in indices_list[1:]: index = opset9.add(g, index, ind) broadcast_index_shape = g.op("Shape", index) indices_list = [ symbolic_helper._unsqueeze_helper( g, opset9.expand(g, ind, broadcast_index_shape, None), [-1] ) for ind in indices_list ] index = g.op("Concat", *indices_list, axis_i=-1) else: # Replace index_put node with masked_scatter or masked_fill # when inputs to the index_put node contains a single boolean input. # # index_put -> masked_fill # * input index contains single tensor of Bool type (e.g.: %24 <- %23). # * input value contains single element (e.g.: %18). # # Torch IR # %mask : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = aten::clone(%0, %6) # %16 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = # aten::to(%8, %26, %27, %11, %12, %28, %29, %15) # %18 : Float(requires_grad=0, device=cpu) = prim::Constant[value={1}]() # %23 : Bool(8, strides=[1], device=cpu) = aten::view(%16, %22) # %24 : Tensor?[] = prim::ListConstruct(%23) # %25 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = # aten::index_put(%mask, %24, %18, %30) # return (%25) # # # index_put -> masked_scatter # * input index contains single tensor of Bool type (e.g.: %32 <- %31). # * input value contains multiple elements (e.g.: %28). # # Torch IR # %mask : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = aten::clone(%0, %6) # %28 : Float(8, strides=[1], requires_grad=0, device=cpu) # = prim::Constant[value= 1 1 1 1 1 1 1 1 [ CPUFloatType{8} ]]() # %15 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = aten::ne(%mask, %some_const) # %23 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = aten::to(%15, %34, %35, %18, %19, %36, %37, %22) # %38 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %30 : int[] = prim::Constant[value=[-1]]() # %31 : Bool(8, strides=[1], device=cpu) = aten::view(%23, %30) # %32 : Tensor?[] = prim::ListConstruct(%31) # %33 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = aten::index_put(%mask, %32, %28, %38) # return (%33) index = indices_list[0] bool_inp = index if bool_inp.type() is not None and bool_inp.type().scalarType() == "Bool": # type: ignore[attr-defined] # TODO(justinchuby): Remove type ignore after #81112 is checked in. rank = symbolic_helper._get_tensor_rank(values) if rank is not None and rank == 0: return opset9.masked_fill(g, self, bool_inp, values) return masked_scatter(g, self, bool_inp, values) broadcast_index_shape = g.op("Shape", index) index = symbolic_helper._unsqueeze_helper(g, index, [-1]) sub_data_shape = symbolic_helper._slice_helper( g, g.op("Shape", self), axes=[0], starts=[len(indices_list)], ends=[sys.maxsize] ) values_shape = g.op("Concat", broadcast_index_shape, sub_data_shape, axis_i=0) # Check if values is a singular value and expand accordingly rank = symbolic_helper._get_tensor_rank(values) if rank is not None and rank == 0: values = opset9.expand(g, values, values_shape, None) values = symbolic_helper._reshape_helper(g, values, values_shape) dtype = self.type().scalarType() if dtype is not None and dtype != values.type().scalarType(): values = g.op("Cast", values, to_i=symbolic_helper.cast_pytorch_to_onnx[dtype]) dtype = symbolic_helper.scalar_type_to_onnx.index( symbolic_helper.cast_pytorch_to_onnx[dtype] ) dtype = symbolic_helper.scalar_type_to_pytorch_type[dtype] if accumulate: zeros = g.op( "ConstantOfShape", g.op("Shape", self), value_t=torch.tensor([0], dtype=dtype), ) result = g.op("ScatterND", zeros, index, values) result = add(g, self, result) else: result = g.op("ScatterND", self, index, values) return result
def unsqueeze(g, self, dim): return sym_help._unsqueeze_helper(g, self, [dim])
def index_put(g, self, indices_list_value, values, accumulate=False): if sym_help._is_packed_list(indices_list_value): indices_list = sym_help._unpack_list(indices_list_value) else: indices_list = [indices_list_value] if sym_help._operator_export_type == torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK: args = [self] + indices_list + [values, accumulate] return g.op("ATen", *args, operator_s='index_put') from torch.onnx.symbolic_opset9 import add, expand accumulate = sym_help._parse_arg(accumulate, 'b') if len(indices_list) == 0: return values index = indices_list[0] if len(indices_list) > 1: for ind in indices_list[1:]: index = add(g, index, ind) broadcast_index_shape = g.op("Shape", index) indices_list = [ sym_help._unsqueeze_helper( g, expand(g, ind, broadcast_index_shape, None), [-1]) for ind in indices_list ] index = g.op("Concat", *indices_list, axis_i=-1) else: # Replace index_put node with masked_scatter or masked_fill # when inputs to the index_put node contains boolean inputs # # index_put -> masked_fill # * input index contains single tensor of Bool type (e.g.: %24 <- %23). # * input value contains single element (e.g.: %18). # # Torch IR # %mask : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = aten::clone(%0, %6) # %16 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = # aten::to(%8, %26, %27, %11, %12, %28, %29, %15) # %18 : Float(requires_grad=0, device=cpu) = prim::Constant[value={1}]() # %23 : Bool(8, strides=[1], device=cpu) = aten::view(%16, %22) # %24 : Tensor?[] = prim::ListConstruct(%23) # %25 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = # aten::index_put(%mask, %24, %18, %30) # return (%25) # # # index_put -> masked_scatter # * input index contains single tensor of Bool type (e.g.: %32 <- %31). # * input value contains multiple elements (e.g.: %28). # # Torch IR # %mask : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = aten::clone(%0, %6) # %28 : Float(8, strides=[1], requires_grad=0, device=cpu) # = prim::Constant[value= 1 1 1 1 1 1 1 1 [ CPUFloatType{8} ]]() # %15 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = aten::ne(%mask, %some_const) # %23 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = aten::to(%15, %34, %35, %18, %19, %36, %37, %22) # %38 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]() # %30 : int[] = prim::Constant[value=[-1]]() # %31 : Bool(8, strides=[1], device=cpu) = aten::view(%23, %30) # %32 : Tensor?[] = prim::ListConstruct(%31) # %33 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) # = aten::index_put(%mask, %32, %28, %38) # return (%33) bool_inp = index if bool_inp.type() is not None and bool_inp.type().scalarType( ) == 'Bool': rank = sym_help._get_tensor_rank(values) if rank is not None and rank == 0: from torch.onnx.symbolic_opset9 import masked_fill return masked_fill(g, self, bool_inp, values) return masked_scatter(g, self, bool_inp, values) broadcast_index_shape = g.op("Shape", index) index = sym_help._unsqueeze_helper(g, index, [-1]) sub_data_shape = sym_help._slice_helper(g, g.op("Shape", self), axes=[0], starts=[len(indices_list)], ends=[maxsize]) values_shape = g.op("Concat", broadcast_index_shape, sub_data_shape, axis_i=0) # Check if values is a singular value and expand accordingly rank = sym_help._get_tensor_rank(values) if rank is not None and rank == 0: values = expand(g, values, values_shape, None) values = g.op("Reshape", values, values_shape) dtype = self.type().scalarType() if dtype is not None and dtype != values.type().scalarType(): values = g.op("Cast", values, to_i=sym_help.cast_pytorch_to_onnx[dtype]) dtype = sym_help.scalar_type_to_onnx.index( sym_help.cast_pytorch_to_onnx[dtype]) dtype = sym_help.scalar_type_to_pytorch_type[dtype] if accumulate: zeros = g.op("ConstantOfShape", g.op("Shape", self), value_t=torch.tensor([0], dtype=dtype)) result = g.op("ScatterND", zeros, index, values) result = add(g, self, result) else: result = g.op("ScatterND", self, index, values) return result
def diagonal(g, self, offset, dim1, dim2): dim1_size = opset9.size( g, self, dim=g.op("Constant", value_t=torch.LongTensor([dim1])) ) dim2_size = opset9.size( g, self, dim=g.op("Constant", value_t=torch.LongTensor([dim2])) ) # Create appropriate mask mask_shape = g.op("Concat", dim1_size, dim2_size, axis_i=0) mask = opset9.zeros(g, mask_shape, None, None, None) mask = g.op("EyeLike", mask, k_i=offset) # dim1 and dim2 appended as a dimension at the end of the shape rank = symbolic_helper._get_tensor_rank(self) if rank is not None: axes = list(range(rank)) axes.remove(dim1) axes.remove(dim2) self = g.op("Transpose", self, perm_i=axes + [dim1, dim2]) else: return symbolic_helper._unimplemented("diagonal", "unknown input rank") # Multiply input and mask to calculate values along diagonal # The mask consists of one values where diagonal values are to be calculated # For example: # [[1.1, 1.2, 1.3], * [[1, 0, 0] = [[1.1, 0, 0], # [2.1, 2.2, 2.3], [0, 1, 0] [0, 2.2, 0], # [3.1, 3.2, 3.3]] [0, 0, 1]] [0, 0, 3.3]] result = g.op("Mul", self, mask) result = symbolic_helper._reducesum_helper(g, result, axes_i=[-1], keepdims_i=0) # Calculate gather indices based on offset and dims # If offset is greater than zero, set offset to zero as this aids in # calculation of selection window offset_op = g.op("Constant", value_t=torch.LongTensor([offset])) if offset >= 0: diag_size = g.op( "Max", g.op("Min", dim1_size, g.op("Sub", dim2_size, offset_op)), g.op("Constant", value_t=torch.LongTensor([0])), ) offset = 0 else: diag_size = g.op( "Max", g.op("Min", g.op("Add", dim1_size, offset_op), dim2_size), g.op("Constant", value_t=torch.LongTensor([0])), ) diag_size = g.op("Concat", diag_size, axis_i=0) # Calculate which diagonal values to select # For example, in cases with offsets: # [[0, 1.1, 0] # [0, 0, 2.2]] # we need to select the last two columns, so we create a tensor # with all columns that are to be selected # So in this example, it is [1, 2] select_window_ones_fill = opset9.ones(g, diag_size, 4, None, None) select_window = g.op( "CumSum", select_window_ones_fill, g.op("Constant", value_t=torch.LongTensor([0])), ) select_window = g.op( "Add", select_window, g.op("Constant", value_t=torch.LongTensor([abs(offset) - 1])), ) gather_shape = [ opset9.size(g, result, dim=g.op("Constant", value_t=torch.LongTensor([axis]))) for axis in list(range(rank))[:-2] ] gather_shape.append(diag_size) gather_shape = g.op("Concat", *gather_shape, axis_i=0) gather_indices = opset9.zeros(g, gather_shape, 4, None, None) # There might be cases where offset value is greater than number of rows/columns # and might cause the diagonal to overrun and as a result of this, diag_size would be zero. # For example, if # offset = 9, dim1_size = 2 (columns), dim2_size = 4 (rows) # diag_size = max(min(2, (4-9)), 0) = 0, based on calculation above # Cases with diagonal overrun always result in diag_size = max(0, -ve value) = 0 # In cases without diagonal overrun, we select the appropriate rows/columns along which we # are calculating diagonal values. In cases with diagonal overrun, we return a tensor which has # the dimension of the row/column where overrun occurred as 0-dim, as we are essentially # returning an empty tensor overrun_cond = g.op( "Not", g.op( "Equal", diag_size, g.op("Constant", value_t=torch.tensor(0, dtype=torch.int64)), ), ) if_op = g.op("If", overrun_cond) if_node = if_op.node() if_block = utils._add_block(if_node) gather_indices_if_block = if_block.op("Add", gather_indices, select_window) gather_indices_if_block = symbolic_helper._unsqueeze_helper( if_block, gather_indices_if_block, [rank - 1] ) final_non_overrun_ = if_block.op( "GatherND", result, gather_indices_if_block, batch_dims_i=rank - 2 ) utils._add_output_to_block(if_block, final_non_overrun_) else_block = utils._add_block(if_node) final_overrun_ = opset9.zeros(else_block, gather_shape, 6, None, None) utils._add_output_to_block(else_block, final_overrun_) return if_op
def embedding_bag(g, embedding_matrix, indices, offsets, scale_grad_by_freq, mode, sparse, per_sample_weights, include_last_offset, padding_idx): if scale_grad_by_freq and sym_help._training_mode: return sym_help._onnx_unsupported( 'embedding_bag with scale_grad_by_freq for training mode') if padding_idx is not None and padding_idx >= 0: raise RuntimeError('embedding_bag with padding_idx') loop_condition = g.op("Constant", value_t=torch.tensor(1)) loop_condition = g.op("Cast", loop_condition, to_i=9) zero = g.op("Constant", value_t=torch.tensor([0])) indices_len = sym_help._unsqueeze_helper( g, sym_help._size_helper(g, indices, g.op("Constant", value_t=torch.tensor(0))), [0]) if not include_last_offset: offsets = [offsets, indices_len] offsets = g.op("Concat", *offsets, axis_i=0) # Offsets holds the starting index position of each bag. So we create a list of the indices slices (determined by # offsets) and gather those indices in indices_row. Then we use this subset of indices to gather from embeddings. # The embeddings output is a loop scan output, so we can avoid creating a sequence and inserting elements in. offsets_starts = sym_help._slice_helper(g, offsets, axes=[0], starts=[0], ends=[maxsize], steps=[1]) offsets_ends = sym_help._slice_helper(g, offsets, axes=[0], starts=[1], ends=[maxsize], steps=[1]) loop_len = sym_help._size_helper(g, offsets_ends, g.op("Constant", value_t=torch.tensor(0))) loop = g.op("Loop", loop_len, loop_condition) loop_block = _add_block(loop.node()) block_input_iter = _add_input_to_block(loop_block) cond = _add_input_to_block(loop_block) indices_start = loop_block.op("Gather", offsets_starts, block_input_iter, axis_i=0) indices_end = loop_block.op("Gather", offsets_ends, block_input_iter, axis_i=0) indices_start = sym_help._unsqueeze_helper(loop_block, indices_start, [0]) indices_end = sym_help._unsqueeze_helper(loop_block, indices_end, [0]) indices_row = loop_block.op("Slice", indices, indices_start, indices_end, zero) embeddings = loop_block.op("Gather", embedding_matrix, indices_row, axis_i=0) if not sym_help._is_none(per_sample_weights): per_sample_weights_row = loop_block.op("Slice", per_sample_weights, indices_start, indices_end, zero) per_sample_weights_row = sym_help._unsqueeze_helper( loop_block, per_sample_weights_row, [1]) embeddings = loop_block.op("Mul", embeddings, per_sample_weights_row) if mode == 0: embeddings = sym_help._reducesum_helper(loop_block, embeddings, axes_i=[0], keepdims_i=0) elif mode == 1: embeddings = loop_block.op("ReduceMean", embeddings, axes_i=[0], keepdims_i=0) else: embeddings = loop_block.op("ReduceMax", embeddings, axes_i=[0], keepdims_i=0) cond_out = loop_block.op("Cast", loop_condition, to_i=9) _add_output_to_block(loop_block, cond_out) _add_output_to_block(loop_block, embeddings) # aten::embedding_bag returns a tuple of 4 elements: output, offset2bag, bag_size, max_indices. # But the last three outputs are not used in torch.nn.EmbeddingBag or torch.nn.functional.embedding_bag. return loop.node().output(), None, None, None
def unfold(g, input, dimension, size, step): const_size = symbolic_helper._maybe_get_const(size, "i") const_step = symbolic_helper._maybe_get_const(step, "i") if not symbolic_helper._is_value( const_size) and not symbolic_helper._is_value(const_step): return opset9.unfold(g, input, dimension, const_size, const_step) if symbolic_helper.is_caffe2_aten_fallback(): return g.at("unfold", input, dimension_i=dimension, size_i=size, step_i=step) sizedim = symbolic_helper._get_tensor_dim_size(input, dimension) if sizedim is not None: low_start = g.op("Constant", value_t=torch.tensor(0)) low_end = g.op("Constant", value_t=torch.tensor(sizedim)) hi_end = g.op("Constant", value_t=torch.tensor(sizedim + 1)) low_indices = g.op("Range", low_start, low_end, step) hi_indices = g.op("Range", size, hi_end, step) low_size = symbolic_helper._size_helper( g, low_indices, g.op("Constant", value_t=torch.tensor(0))) hi_size = symbolic_helper._size_helper( g, hi_indices, g.op("Constant", value_t=torch.tensor(0))) ndim = symbolic_helper._get_tensor_rank(input) assert ndim is not None perm = list(range(0, ndim)) perm.append(perm.pop(dimension)) unsqueeze_list = [] loop_condition = g.op("Constant", value_t=torch.tensor(1)) loop_condition = g.op("Cast", loop_condition, to_i=9) loop_len = g.op("Min", low_size, hi_size) loop = g.op("Loop", loop_len, loop_condition) loop_block = utils._add_block(loop.node()) block_input_iter = utils._add_input_to_block(loop_block) cond = utils._add_input_to_block(loop_block) starts = loop_block.op("Gather", low_indices, block_input_iter) ends = loop_block.op("Gather", hi_indices, block_input_iter) axes = loop_block.op("Constant", value_t=torch.tensor([2])) starts = symbolic_helper._unsqueeze_helper(loop_block, starts, [0]) ends = symbolic_helper._unsqueeze_helper(loop_block, ends, [0]) stack = loop_block.op("Slice", input, starts, ends, axes) unsqueeze = symbolic_helper._unsqueeze_helper( loop_block, loop_block.op("Transpose", stack, perm_i=perm), [dimension]) unsqueeze_list.append(unsqueeze) concat = loop_block.op("Concat", *unsqueeze_list, axis_i=0) cond_out = loop_block.op("Cast", loop_condition, to_i=9) utils._add_output_to_block(loop_block, cond_out) utils._add_output_to_block(loop_block, concat) loop_output = loop.node().output() perm = [0, 1, 2, 3, 4] perm[0], perm[dimension + 1] = perm[dimension + 1], perm[0] transpose = g.op("Transpose", loop_output, perm_i=perm) squeeze = symbolic_helper._squeeze_helper(g, transpose, [0]) return squeeze else: return symbolic_helper._unimplemented("Unfold", "input size not accessible")