def max_pool3d_with_indices(g, input, kernel_size, stride, padding, dilation, ceil_mode): if ceil_mode: return _unimplemented("max_pool3d_with_indices", "ceil_mode") if set(_triple(dilation)) != {1}: return _unimplemented("max_pool3d_with_indices", "dilation") if not stride: stride = kernel_size r = g.op("MaxPool", input, kernel_shape_i=_triple(kernel_size), pads_i=_triple(padding) * 2, strides_i=_triple(stride)) return r, None
def max_pool2d(g, input, kernel_size, stride, padding, dilation, ceil_mode): if ceil_mode: return _unimplemented("max_pool2d", "ceil_mode") if set(_pair(dilation)) != {1}: return _unimplemented("max_pool2d", "dilation") if not stride: stride = kernel_size r = g.op("MaxPool", input, kernel_shape_i=_pair(kernel_size), pads_i=_pair(padding) * 2, strides_i=_pair(stride)) return r
def slice(g, self, dim, start, end, step): if step != 1: _unimplemented("slice", "step!=1 is currently not supported") if start.node().kind() != 'onnx::Constant' or \ end.node().kind() != 'onnx::Constant' or dim.node().kind() != 'onnx::Constant': start_unsqueezed = g.op("Unsqueeze", start, axes_i=[0]) end_unsqueezed = g.op("Unsqueeze", end, axes_i=[0]) dim_unsqueezed = g.op("Unsqueeze", dim, axes_i=[0]) return g.op("DynamicSlice", self, start_unsqueezed, end_unsqueezed, dim_unsqueezed) else: start = _parse_arg(start, 'i') end = _parse_arg(end, 'i') dim = _parse_arg(dim, 'i') return g.op("Slice", self, axes_i=[dim], starts_i=[start], ends_i=[end])
def symbolic(g, input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False): from torch.onnx.symbolic import _unimplemented if ceil_mode: return _unimplemented("MaxPool1d", "ceil_mode") if set(_single(dilation)) != {1}: return _unimplemented("MaxPool1d", "dilation") if stride is None: stride = kernel_size r = g.op("MaxPool", input, kernel_shape_i=_single(kernel_size), pads_i=_single(padding), strides_i=_single(stride)) return r, None
def add(g, self, other, alpha=None): # default alpha arg is to allow no-alpha add (aten add st overload no alpha) if alpha and _scalar(_maybe_get_scalar(alpha)) != 1: return _unimplemented("add", "alpha != 1") # See Note [Pointwise by scalar] other = _maybe_get_scalar(other) return g.op("Add", self, _if_scalar_type_as(g, other, self))
def sub(g, self, other, alpha=None): # default alpha arg is to allow no-alpha sub (aten sub st overload no alpha) if alpha and _scalar(_maybe_get_scalar(alpha)) != 1: return _unimplemented("sub", "alpha != 1") # See Note [Pointwise by scalar]. Note that self or other may be scalars. other = _maybe_get_scalar(other) return g.op("Sub", self, _if_scalar_type_as(g, other, self))
def symbolic(g, input, p=0.5, train=False, inplace=False): # See Note [Export inplace] # NB: In inference mode, FeatureDropout is exported as an identity op. from torch.onnx.symbolic import _unimplemented if train: return _unimplemented("FeatureDropout", "training mode") return input
def symbolic(g, input, p=0.5, train=False, inplace=False): # See Note [Export inplace] # NB: In inference mode, FeatureDropout is exported as an identity op. from torch.onnx.symbolic import _unimplemented if train: return _unimplemented("AlphaDropout", "training mode") return input
def upsample_bilinear2d(g, input, output_size, align_corners): if align_corners: return _unimplemented("upsample_bilinear2d", "align_corners == True") height_scale = float(output_size[-2]) / input.type().sizes()[-2] width_scale = float(output_size[-1]) / input.type().sizes()[-1] scales = g.op("Constant", value_t=torch.tensor([1., 1., height_scale, width_scale])) return g.op("Upsample", input, scales, mode_s="linear")
def log_softmax(g, input, dim=None): # PyTorch dim and ONNX axis have different meanings. # See Softmax comment for details. if dim < 0: dim = len(input.type().sizes()) + dim if len(input.type().sizes()) != dim + 1: return _unimplemented("dim", "ONNX and PyTorch use different strategies to split the input.") return g.op("LogSoftmax", input, axis_i=dim)
def upsample_bilinear2d(g, input, output_size, align_corners): if align_corners: return _unimplemented("upsample_bilinear2d", "align_corners == True") height_scale = float(output_size[-2]) / input.type().sizes()[-2] width_scale = float(output_size[-1]) / input.type().sizes()[-1] return g.op("Upsample", input, scales_f=[1., 1., height_scale, width_scale], mode_s="bilinear")
def RNN_symbolic_builder(cell_type, *args, **kwargs): if cell_type == 'LSTM': return RNN_variant_symbolic_builder('LSTM', *args, **kwargs) elif cell_type == 'GRU': return RNN_variant_symbolic_builder('GRU', *args, **kwargs) elif cell_type.startswith('RNN_'): return RNN_variant_symbolic_builder('RNN', *args, nonlinearity=cell_type[4:], **kwargs) else: return lambda *args, **kwargs: _unimplemented("RNN", "cell type " + cell_type)
def sub(g, self, other, alpha): if _scalar(alpha) != 1: return _unimplemented("sub", "alpha != 1") # See Note [Pointwise by scalar]. Note that self or other may be scalars. other = _maybe_get_scalar(other) self = _maybe_get_scalar(self) self = _if_scalar_type_as(g, self, other) other = _if_scalar_type_as(g, other, self) return g.op("Sub", self, other)
def symbolic(g, input, size=None, scale_factor=None): if scale_factor is None: scale_factor = 1.0 if size is not None and set(size) != set([None]): from torch.onnx.symbolic import _unimplemented return _unimplemented("UpsamplingNearest2d", "size") return g.op("Upsample", input, width_scale_f=scale_factor, height_scale_f=scale_factor, mode_s="nearest")
def pixel_shuffle(g, self, upscale_factor): dims = self.type().sizes() if len(dims) != 4: return _unimplemented("pixel_shuffle", "only support 4d input") output_channel = dims[1] // upscale_factor // upscale_factor after_view = view(g, self, [-1, upscale_factor, upscale_factor, output_channel, dims[2], dims[3]]) after_transpose = g.op("Transpose", after_view, perm_i=[0, 1, 4, 2, 5, 3]) return view(g, after_transpose, [-1, output_channel, dims[2] * upscale_factor, dims[3] * upscale_factor])
def symbolic_fn(g, input, kernel_size, stride, padding, ceil_mode, count_include_pad): if ceil_mode: return _unimplemented("avg_pool2d", "ceil_mode") if not stride: stride = kernel_size padding = tuple(tuple_fn(padding)) if count_include_pad: input = g.op("Pad", input, pads_i=((0,) * 2 + padding) * 2, mode_s='constant', value_f=0.) padding = (0,) * len(padding) return g.op("AveragePool", input, kernel_shape_i=tuple_fn(kernel_size), strides_i=tuple_fn(stride), pads_i=padding * 2)
def softmax(g, input, dim): # Softmax does normalization at vector level. # PyTorch and ONNX use different strategies to split the input tensor into vectors. # Thus dim and axis have different meanings. # PyTorch slices the input tensor into vectors along the `dim`-th dimension. # ONNX reshapes the input into a 2-D tensor, and `axis` indicates where the input is coerced. # If input is a 2 x 3 tensor: # input = [[1.0, 1.0, 1.0], # [1.0, 1,0, 1,0]] # with dim = 0, the result is: # result = [[0.5, 0.5, 0.5], # [0.5, 0.5, 0.5]] # with axis = 0, the result is: # result = [[0.167, 0.167, 0.167], # [0.167, 0.167, 0.167]] # So only when dim and axis both equal to ndim - 1 (the last dimension), # their semantics are equivalent. if dim < 0: dim = len(input.type().sizes()) + dim if len(input.type().sizes()) != dim + 1: return _unimplemented("dim", "ONNX and PyTorch use different strategies to split the input.") return g.op('Softmax', input, axis_i=dim)
def feature_dropout(g, input, p, train): # NB: In inference mode, FeatureDropout is exported as an identity op. from torch.onnx.symbolic import _unimplemented if train: return _unimplemented(name, "training mode") return input
def softplus(g, self, beta, threshold): if beta != 1: return _unimplemented("beta", "has to be 1") return g.op('Softplus', self)
def symbolic(g, input, all_weights, initial_states, batch_sizes): if batch_first: return _unimplemented("RNN/GRU/LSTM", "batch_first") if dropout and kwargs['train']: return _unimplemented("RNN/GRU/LSTM", "dropout in training mode") unidirectional = not bidirectional prev_output = input h_outs = [] if variant == 'RNN' or variant == 'GRU': h0 = initial_states elif variant == 'LSTM': h0, c0 = initial_states c_outs = [] sequence_lens = unused(g) if batch_sizes is None else batch_sizes if variant == 'GRU': # pytorch is reset, input, hidden # onnx is input, reset, hidden reform_permutation = [(1, 2), (0, 1), (2, 3)] elif variant == 'LSTM': # pytorch is input, forget, cell, output. # onnx is input, output, forget, cell. reform_permutation = [(0, 1), (3, 4), (1, 3)] def transform_weights(layer_index): if variant == 'RNN': weight_ih, weight_hh, bias_ih, bias_hh = all_weights[ layer_index] elif variant == 'GRU' or variant == 'LSTM': weight_ih, weight_hh, bias_ih, bias_hh = \ [reform_weights(g, w, hidden_size, reform_permutation) for w in all_weights[layer_index]] bias_concat = g.op('Concat', bias_ih, bias_hh, axis_i=0) return tuple( g.op('Unsqueeze', x, axes_i=[0]) for x in (weight_ih, weight_hh, bias_concat)) def retrieve_state(x, start, end): return x if num_layers == 1 else g.op( 'Slice', x, axes_i=[0], starts_i=[start], ends_i=[end]) for i in range(num_layers): if unidirectional: weight_ih, weight_hh, bias_concat = transform_weights(i) state_indices = i, i + 1 else: weight_ih_f, weight_hh_f, bias_f = transform_weights(2 * i) weight_ih_b, weight_hh_b, bias_b = transform_weights(2 * i + 1) weight_ih = g.op('Concat', weight_ih_f, weight_ih_b, axis_i=0) weight_hh = g.op('Concat', weight_hh_f, weight_hh_b, axis_i=0) bias_concat = g.op('Concat', bias_f, bias_b, axis_i=0) state_indices = 2 * i, 2 * i + 2 inputs = [ prev_output, weight_ih, weight_hh, bias_concat, sequence_lens ] inputs.append(retrieve_state(h0, *state_indices)) if variant == 'LSTM': inputs.append(retrieve_state(c0, *state_indices)) extra_kwargs = {} if unidirectional else { 'direction_s': 'bidirectional' } if variant == 'RNN': prev_output, h_out = g.op( 'RNN', *inputs, outputs=2, hidden_size_i=hidden_size, activations_s=[kwargs['nonlinearity'].lower()], **extra_kwargs) elif variant == 'GRU': prev_output, h_out = g.op('GRU', *inputs, outputs=2, hidden_size_i=hidden_size, linear_before_reset_i=1, **extra_kwargs) elif variant == 'LSTM': prev_output, h_out, c_out = g.op('LSTM', *inputs, outputs=3, hidden_size_i=hidden_size, **extra_kwargs) if bidirectional: # The ONNX RNN/GRU/LSTM produce an output of dimensions # seq_len, num_directions, batch, hidden_size # We have to convert to match pytorch's expected # seq_len, batch, hidden_size * num_directions # by first moving num_directions to the end with # Transpose, and then combining it with hidden_size # with Reshape. prev_output = g.op('Transpose', prev_output, perm_i=[0, 2, 3, 1]) prev_output = g.op( 'Reshape', prev_output, g.op('Constant', value_t=torch.LongTensor([0, 0, -1]))) else: prev_output = g.op('Squeeze', prev_output, axes_i=[1]) h_outs.append(h_out) if variant == 'LSTM': c_outs.append(c_out) h_outs = h_out if num_layers == 1 else g.op( 'Concat', *h_outs, axis_i=0) if variant == 'RNN' or variant == 'GRU': return prev_output, h_outs elif variant == 'LSTM': c_outs = c_out if num_layers == 1 else g.op( 'Concat', *c_outs, axis_i=0) return prev_output, h_outs, c_outs
def _generic_rnn(g, variant, input, initial_states, all_weights, has_biases, num_layers, dropout, train, bidirectional, batch_first=None, batch_sizes=None): weights_per_layer = 4 if has_biases else 2 assert len(all_weights) == num_layers * weights_per_layer * (1 + bidirectional) layer_weights = [ all_weights[i:i + weights_per_layer] for i in range(0, len(all_weights), weights_per_layer) ] if batch_first: return _unimplemented("RNN/GRU/LSTM", "batch_first") if dropout and train: return _unimplemented("RNN/GRU/LSTM", "dropout in training mode") if variant.startswith('RNN'): nonlinearity = variant[4:].lower() variant = 'RNN' w_hh = all_weights[1] hidden_size = w_hh.type().sizes()[1] unidirectional = not bidirectional prev_output = input h_outs = [] if variant == 'RNN' or variant == 'GRU': h0 = initial_states elif variant == 'LSTM': h0, c0 = initial_states c_outs = [] sequence_lens = unused(g) if batch_sizes is None else batch_sizes if variant == 'GRU': # pytorch is reset, input, hidden # onnx is input, reset, hidden reform_permutation = [(1, 2), (0, 1), (2, 3)] elif variant == 'LSTM': # pytorch is input, forget, cell, output. # onnx is input, output, forget, cell. reform_permutation = [(0, 1), (3, 4), (1, 3)] def reform_weights(g, w, n, intervals): slices = [ g.op('Slice', w, axes_i=[0], starts_i=[x * n], ends_i=[y * n]) for x, y in intervals ] return g.op('Concat', *slices, axis_i=0) def transform_weights(layer_index): if variant == 'RNN': weight_ih, weight_hh, bias_ih, bias_hh = layer_weights[layer_index] elif variant == 'GRU' or variant == 'LSTM': weight_ih, weight_hh, bias_ih, bias_hh = \ [reform_weights(g, w, hidden_size, reform_permutation) for w in layer_weights[layer_index]] bias_concat = g.op('Concat', bias_ih, bias_hh, axis_i=0) return tuple( g.op('Unsqueeze', x, axes_i=[0]) for x in (weight_ih, weight_hh, bias_concat)) def retrieve_state(x, start, end): return x if num_layers == 1 else g.op( 'Slice', x, axes_i=[0], starts_i=[start], ends_i=[end]) for i in range(num_layers): if unidirectional: weight_ih, weight_hh, bias_concat = transform_weights(i) state_indices = i, i + 1 else: weight_ih_f, weight_hh_f, bias_f = transform_weights(2 * i) weight_ih_b, weight_hh_b, bias_b = transform_weights(2 * i + 1) weight_ih = g.op('Concat', weight_ih_f, weight_ih_b, axis_i=0) weight_hh = g.op('Concat', weight_hh_f, weight_hh_b, axis_i=0) bias_concat = g.op('Concat', bias_f, bias_b, axis_i=0) state_indices = 2 * i, 2 * i + 2 inputs = [ prev_output, weight_ih, weight_hh, bias_concat, sequence_lens ] inputs.append(retrieve_state(h0, *state_indices)) if variant == 'LSTM': inputs.append(retrieve_state(c0, *state_indices)) extra_kwargs = {} if unidirectional else { 'direction_s': 'bidirectional' } if variant == 'RNN': prev_output, h_out = g.op('RNN', *inputs, outputs=2, hidden_size_i=hidden_size, activations_s=[nonlinearity], **extra_kwargs) elif variant == 'GRU': prev_output, h_out = g.op('GRU', *inputs, outputs=2, hidden_size_i=hidden_size, linear_before_reset_i=1, **extra_kwargs) elif variant == 'LSTM': prev_output, h_out, c_out = g.op('LSTM', *inputs, outputs=3, hidden_size_i=hidden_size, **extra_kwargs) if bidirectional: # The ONNX RNN/GRU/LSTM produce an output of dimensions # seq_len, num_directions, batch, hidden_size # We have to convert to match pytorch's expected # seq_len, batch, num_directions * hidden_size # by first moving num_directions before hidden_size with # Transpose, and then combining it with hidden_size # with Reshape. prev_output = g.op('Transpose', prev_output, perm_i=[0, 2, 1, 3]) prev_output = g.op( 'Reshape', prev_output, g.op('Constant', value_t=torch.LongTensor([0, 0, -1]))) else: prev_output = g.op('Squeeze', prev_output, axes_i=[1]) h_outs.append(h_out) if variant == 'LSTM': c_outs.append(c_out) h_outs = h_out if num_layers == 1 else g.op('Concat', *h_outs, axis_i=0) if variant == 'RNN' or variant == 'GRU': return prev_output, h_outs elif variant == 'LSTM': c_outs = c_out if num_layers == 1 else g.op( 'Concat', *c_outs, axis_i=0) return prev_output, h_outs, c_outs
def slice(g, self, dim, start, end, step): if step != 1: _unimplemented("slice", "step!=1 is currently not supported") return g.op("Slice", self, axes_i=[dim], starts_i=[start], ends_i=[end])
def add(g, self, other, alpha): if _scalar(alpha) != 1: return _unimplemented("add", "alpha != 1") # See Note [Pointwise by scalar] other = _maybe_get_scalar(other) return g.op("Add", self, _if_scalar_type_as(g, other, self))