def embedding(x, size, vocabulary=None, **kw): """ Embedding layer.\n The input is usually a list of indices (integers), and the output is a dense matrix which maps indices to dense vectors. Thus the output will have 1 more dimension than the input.\n **Note**: The output of this function is always one more dimension than the input. For input with shape `(*)`, The output will be `(*, size)`. Any shape specifications in the KWargs are ignored. \n - `x: Tensor`; Contains indices into the vocabulary. Will be converted to `LongTensor` of integers. Can be of any shape. - `size: int`; The size of embedding vector. - `vocabulary: int or None`; The size of vocabulary of embedding, or max number of unique indices in `x`. By default it is set to `max(x)-min(x)+1`. - `**kw: dict`; Any additional KWargs are passed down to `torch.nn.LayerNorm`, as well as `warm.engine.forward`. - `return: Tensor`; With the embedded dim appended to the shape of x. Thus with shape `(*, Size)`, where `*` is the shape of `x`. """ x = x.type(torch.LongTensor) if vocabulary is None: vocabulary = x.max() - x.min() + 1 kw.pop('in_shape', None) kw.pop('out_shape', None) kw.pop('base_shape', None) inferred_kw = dict( base_name='embedding', base_class=nn.Embedding, base_kw=dict( num_embeddings=vocabulary, embedding_dim=size, **engine.unused_kwargs(kw), ), base_shape=None, in_shape=None, out_shape=None, ) return engine.forward(x, **{**inferred_kw, **kw})
def test_unused_kwargs(): kw = {'unused1': 0, 'unused2': 0, 'base_class': 0} unused = engine.unused_kwargs(kw) assert 'base_class' not in unused, 'unused_kwargs leaks used.' assert set(unused.keys()) == { 'unused1', 'unused2' }, 'unused_kwargs did not filter kw correctly.'
def lstm(x, size, init_weight_hh='orthogonal_', init_weight_ih=None, init_bias_hh=None, init_bias_ih=None, bias=True, num_layers=1, **kw): """ Long Short Term Memory layer.\n - `x: Tensor`; 3d, with shapes `(Batch, Channel, Length)`. - `size: int`; Size of hidden features, and size of the output channel. - `init_weight_hh: None or str or callable`; Initialization specification for the hidden-hidden weight tensor. If a `str`, should be one of the nonlinearity functions contained in `torch.nn.init`. If a `callable`, it will be applied to `x` directly, i.e. `spec(x)`. If a 2-`tuple`, it must be of format `(callable, kwargs)`, i.e. `callable(x, **kwargs)`. Default: `'orthogonal_'`. - `init_weight_ih: None or str or callable`; Initialization specification for the input-hidden weight tensor. Default: `None`, and the weight tensor is initialized using `torch.nn.LSTM`s default scheme. - `init_bias_hh: None or str or callable`; Initialization specification for the hidden-hidden bias tensor. Default: `None`, and the weight tensor is initialized using `torch.nn.LSTM`s default scheme. - `init_bias_ih: None or str or callable`; Initialization specification for the input-hidden bias tensor. Default: `None`, and the weight tensor is initialized using `torch.nn.LSTM`s default scheme. - `bias: bool`; If `False`, then the layer does not use `bias_ih` and `bias_hh`. Default: `True`. - `num_layers: int`; Number of the recurrent layers. Default: 1. - `tuple_out: bool`; If `True`, the returned value will be a tuple `(out, (h_n, c_n))`. Default: False. - `**kw: dict`; Any additional KWargs are passed down to `torch.nn.LSTM`, as well as `warm.engine.forward`. Refer to their docs for details. Some of the additional LSTM arguments: `dropout, bidirectional, batch_first`. - `return: Tensor or tuple`; If `tuple_out` set to true, will return `(out, (h_n, c_n)`, otherwise just `out`. `out` has shape `(Batch, Size, Length*Directions)`, where Directions = 2 if `bidirectional` else 1. `h_n` is the hidden states with shape `(num_layers*Directions, Batch, Size)`. `c_n` is the cell states with shape `(num_layers*Directions, Batch, Size)`. """ init = dict( weight_hh=init_weight_hh, weight_ih=init_weight_ih, bias_hh=init_bias_hh, bias_ih=init_bias_ih, ) inferred_kw = dict( base_name='lstm', base_class=nn.LSTM, base_kw={ 'hidden_size': size, 'num_layers': num_layers, **engine.unused_kwargs(kw), }, base_shape='DBC', infer_kw={'input_size': 'C'}, initialization={ f'{k}_l{l}': init[k] for k in ['weight_hh', 'weight_ih'] + (['bias_hh', 'bias_ih'] if bias else []) for l in range(num_layers) }, ) return engine.forward(x, **{**inferred_kw, **kw})
def conv(x, size, kernel, init_weight=None, init_bias=None, bias=True, **kw): """ Convolution layer.\n - `x: Tensor`; With shape `(Batch, Channel, *)` where `*` Can be 1d or 2d or 3d. If 3d, shapes are `(Batch, Channel, Length)`. If 4d, shapes are `(Batch, Channel, Height, Width)`. If 5d, shapes are `(Batch, Channel, Depth, Height, Width)`. - `size: int`; Size of hidden filters, and size of the output channel. - `kernel: int or tuple`; Size of the convolution kernel. - `init_weight: None or str or callable`; Initialization specification for the weight tensor. If a `str`, should be one of the nonlinearity functions contained in `torch.nn.init`. If a `callable`, it will be applied to `x` directly, i.e. `spec(x)`. If a 2-`tuple`, it must be of format `(callable, kwargs)`, i.e. `callable(x, **kwargs)`. Default: `None`, and the weight tensor is initialized using `torch.nn.ConvNd`s default scheme. - `init_bias: None or str or callable`; Same as `init_weight`, but for the bias tensor. - `bias: bool`; If `True`, adds a learnable bias to the output. Default: `True`. - `**kw:dict`; Any additional KWargs are passed down to `torch.nn.ConvNd`, where N can be 1, 2 or 3. as well as `warm.engine.forward`. Refer to their docs for details. Some of the additional ConvNd arguments: `stride, padding, dilation, groups`. - `return: Tensor`; With shape `(Batch, Size, *)` where `*` can be 1d, 2d, 3d that depends on `x`. """ d = x.ndim - 3 assert d in [0, 1, 2], 'Incompatible number of dims for input x.' inferred_kw = dict( base_name='conv', base_class=[nn.Conv1d, nn.Conv2d, nn.Conv3d][d], base_kw={ 'out_channels': size, 'kernel_size': kernel, 'bias': bias, **engine.unused_kwargs(kw), }, infer_kw={'in_channels': 'C'}, initialization={ 'weight': init_weight, **({ 'bias': init_bias } if bias else {}) }, ) return engine.forward(x, **{**inferred_kw, **kw})
def transformer(x, y=None, num_encoder=6, num_decoder=6, num_head=8, mask=None, causal=False, in_shape='BCD', **kw): """ Transformer layer.\n This layer covers functionality of `Transformer`, `TransformerEncoder`, and `TransformerDecoder`. See [`torch.nn.Transformer`](https://pytorch.org/docs/stable/nn.html#transformer) for more details.\n - `x: Tensor`; The source sequence, with shape `(Batch, Channel, LengthX)`. `Channel` is usually from embedding. - `y: None or Tensor`; The target sequence. Also with shape `(Batch, Channel, LengthY)`. If not present, default to equal `x`. - `num_encoder: int`; Number of encoder layers. Set to 0 to disable encoder and use only decoder. Default 6. - `num_decoder: int`; Number of decoder layers. Set to 0 to disable decoder and use only encoder. Default 6. - `num_head: int`; Number of heads for multi-headed attention. Default 8. - `mask: None or dict`; Keys are among: `src_mask`, `tgt_mask`, `memory_mask`, `src_key_padding_mask`, `tgt_key_padding_mask`, `memory_key_padding_mask`. See the `forward` method of `torch.nn.Transformer` for details. - `causal: bool`; Default false. if true, will add causal masks to source and target, so that current value only depends on the past, not the future, in the sequences. - `**kw: dict`; Any additional KWargs are passed down to `torch.nn.Transformer`, as well as `warm.engine.forward`. - `return: Tensor`; Same shape as `y`, if `num_decoder` > 0. Otherwise same shape as `x`. """ def _causal_mask(n): mask = (torch.triu(torch.ones(n, n)) == 1).transpose(0, 1) return mask.float().masked_fill(mask == 0, float('-inf')).masked_fill( mask == 1, float(0.0)) if y is None: y = x y = permute(y, in_shape, 'DBC') mask = mask or {} if causal: i = in_shape.find('D') mx = _causal_mask(x.shape[i]) mask['src_mask'] = mask.pop('src_mask', 0.0) + mx my = _causal_mask(y.shape[0]) mask['tgt_mask'] = mask.pop('tgt_mask', 0.0) + my encoder = identity if num_encoder == 0 else None decoder = identity if num_decoder == 0 else None inferred_kw = dict( base_name='transformer', base_class=nn.Transformer, base_shape='DBC', base_kw=dict( d_model=x.shape[in_shape.find('C')], custom_encoder=encoder, custom_decoder=decoder, nhead=num_head, num_encoder_layers=num_encoder, num_decoder_layers=num_decoder, **engine.unused_kwargs(kw), ), in_shape=in_shape, forward_kw=mask, forward_arg=(y, ), ) return engine.forward(x, **{**inferred_kw, **kw})