Beispiel #1
0
def GRULM(vocab_size=256, d_model=512, n_layers=2, mode='train'):
    """Returns a GRU language model.

    Args:
        vocab_size (int, optional): Size of the vocabulary. Defaults to 256.
        d_model (int, optional): Depth of embedding (n_units in the GRU cell). Defaults to 512.
        n_layers (int, optional): Number of GRU layers. Defaults to 2.
        mode (str, optional): 'train', 'eval' or 'predict', predict mode is for fast inference. Defaults to "train".

    Returns:
        trax.layers.combinators.Serial: A GRU language model as a layer that maps from a tensor of tokens to activations over a vocab set.
    """
    ### START CODE HERE (Replace instances of 'None' with your code) ###
    model = tl.Serial(
        tl.ShiftRight(mode=mode),  # Stack the ShiftRight layer
        tl.Embedding(vocab_size=vocab_size,
                     d_feature=d_model),  # Stack the embedding layer
        [
            tl.GRU(n_units=d_model) for i in range(n_layers)
        ],  # Stack GRU layers of d_model units keeping n_layer parameter in mind (use list comprehension syntax)
        tl.Dense(n_units=vocab_size),  # Dense layer
        tl.LogSoftmax()  # Log Softmax
    )
    ### END CODE HERE ###
    return model
Beispiel #2
0
def GRULM(vocab_size=256,
          d_model=512,
          n_layers=1,
          mode='train'):
  """Returns a GRU (gated recurrent unit) language model.

  This model performs autoregressive language modeling:

    - input: rank 2 tensor representing a batch of text strings via token IDs
      plus padding markers; shape is (batch_size, sequence_length). The tensor
      elements are integers in `range(vocab_size)`, and `0` values mark padding
      positions.

    - output: rank 3 tensor representing a batch of log-probability
      distributions for each sequence position over possible token IDs;
      shape is (batch_size, sequence_length, `vocab_size`).

  Args:
    vocab_size: Input vocabulary size -- each element of the input tensor
        should be an integer in `range(vocab_size)`. These integers typically
        represent token IDs from a vocabulary-based tokenizer.
    d_model: Embedding depth throughout the model.
    n_layers: Number of GRU layers.
    mode: If `'predict'`, use fast inference (and omit the right shift).

  Returns:
    A GRU language model as a layer that maps from a tensor of tokens
    to activations over a vocab set.
  """
  return tl.Serial(
      tl.ShiftRight(mode=mode),
      tl.Embedding(vocab_size, d_model),
      [tl.GRU(d_model) for _ in range(n_layers)],
      tl.Dense(vocab_size),
  )
Beispiel #3
0
def GRULM(vocab_size=256,
          d_model=512,
          n_layers=1,
          mode='train'):
  """Returns an GRU language model.

  The input to the model is a tensor of tokens (ints).

  Args:
    vocab_size: int: vocab size
    d_model: int:  depth of embedding (n_units in the RNN cell)
    n_layers: int: number of RNN layers
    mode: str: 'train', 'eval' or 'predict', predict mode is for fast inference

  Returns:
    An RNN language model as a layer that maps from a tensor of tokens
    to activations over a vocab set.
  """
  return tl.Serial(
      tl.ShiftRight(mode=mode),
      tl.Embedding(d_model, vocab_size),
      [tl.GRU(d_model) for _ in range(n_layers)],
      tl.Dense(vocab_size),
      tl.LogSoftmax()
  )
Beispiel #4
0
 def test_names(self):
     layer = tl.LSTM(3)
     self.assertEqual('LSTM_3', str(layer))
     layer = tl.GRU(5)
     self.assertEqual('GRU_5', str(layer))
     layer = tl.SRU(7)
     self.assertEqual('SRU_7', str(layer))
Beispiel #5
0
 def test_names(self, backend):
   with fastmath.use_backend(backend):
     layer = tl.LSTM(3)
     self.assertEqual('LSTM_3', str(layer))
     layer = tl.GRU(5)
     self.assertEqual('GRU_5', str(layer))
     layer = tl.SRU(7)
     self.assertEqual('SRU_7', str(layer))
Beispiel #6
0
    def test_dimensionality(self):
        x = np.ones((2, 3, 8))
        layer = tl.Bidirectional(tl.GRU(n_units=8))
        input_signature = shapes.signature(x)
        _, _ = layer.init(input_signature)
        yhat = layer(x)

        self.assertEqual(yhat.shape, (2, 3, 8 + 8))
Beispiel #7
0
# Putting everything together the GRU model will look like this:

# In[4]:

mode = 'train'
vocab_size = 256
model_dimension = 512
n_layers = 2

GRU = tl.Serial(
    tl.ShiftRight(
        mode=mode
    ),  # Do remember to pass the mode parameter if you are using it for interence/test as default is train 
    tl.Embedding(vocab_size=vocab_size, d_feature=model_dimension),
    [
        tl.GRU(n_units=model_dimension) for _ in range(n_layers)
    ],  # You can play around n_layers if you want to stack more GRU layers together
    tl.Dense(n_units=vocab_size),
    tl.LogSoftmax())

# Next is a helper function that prints information for every layer (sublayer within `Serial`):
#
# _Try changing the parameters defined before the GRU model and see how it changes!_
#

# In[5]:


def show_layers(model, layer_prefix="Serial.sublayers"):
    print(f"Total layers: {len(model.sublayers)}\n")
    for i in range(len(model.sublayers)):