def transformer_c(c: Configs): """ Initialize the configurable transformer encoder for our autoregressive model """ tc = TransformerConfigs() tc.n_src_vocab = c.n_tokens tc.n_tgt_vocab = c.n_tokens return tc
def _transformer_configs(c: Configs): """ ### ALiBi based Transformer configurations """ # We use our # [configurable transformer implementation](../configs.html#TransformerConfigs) conf = TransformerConfigs() # Set the vocabulary sizes for embeddings and generating logits conf.n_src_vocab = c.n_tokens conf.n_tgt_vocab = c.n_tokens # GPT uses GELU activation for position wise feedforward conf.ffn.activation = 'GELU' # ALiBi doesn't use positional embeddings conf.src_embed = 'no_pos' conf.tgt_embed = 'no_pos' # Set all attention mechanisms to ALiBi conf.encoder_attn = 'alibi_mha' conf.decoder_attn = 'alibi_mha' conf.decoder_mem_attn = 'alibi_mha' # return conf
def _transformer_configs(c: Configs): """ ### Transformer configurations """ # We use our # [configurable transformer implementation](../configs.html#TransformerConfigs) conf = TransformerConfigs() # Set the vocabulary sizes for embeddings and generating logits conf.n_src_vocab = c.n_tokens conf.n_tgt_vocab = c.n_tokens # return conf
def _transformer_configs(c: Configs): """ ### Transformer configurations """ # We use our # [configurable transformer implementation](../configs.html#TransformerConfigs) conf = TransformerConfigs() # Set the vocabulary sizes for embeddings and generating logits conf.n_src_vocab = c.n_tokens conf.n_tgt_vocab = c.n_tokens # GPT uses GELU activation for position wise feedforward conf.feed_forward_activation = 'GELU' # return conf
def default_transformer(c: Configs): conf = TransformerConfigs() conf.d_model = c.d_model conf.n_layers = c.n_layers conf.n_src_vocab = c.n_tokens conf.n_tgt_vocab = c.n_tokens conf.dropout = c.dropout return conf
def _transformer_configs(c: Configs): """ ### Transformer configurations """ # We use our # [configurable transformer implementation](../configs.html#TransformerConfigs) conf = TransformerConfigs() # Set the vocabulary sizes for embeddings and generating logits conf.n_src_vocab = c.n_tokens conf.n_tgt_vocab = c.n_tokens # Set model size conf.d_model = c.d_model # Replace the encoder layer with a gMLP layer conf.encoder_layer = c.gmlp return conf
def _transformer_configs(c: Configs): """ ### Transformer configurations """ # We use our # [configurable transformer implementation](../configs.html#TransformerConfigs) conf = TransformerConfigs() # Set the vocabulary sizes for embeddings and generating logits conf.n_src_vocab = c.n_tokens conf.n_tgt_vocab = c.n_tokens # Embedding size conf.d_model = c.d_model # Change attention module to [MLPMixer](index.html) from labml_nn.transformers.mlp_mixer import MLPMixer conf.encoder_attn = MLPMixer(c.mix_mlp.ffn) # return conf
def _transformer_configs(c: Configs): """ ### Transformer configurations """ # We use our # [configurable transformer implementation](../configs.html#TransformerConfigs) conf = TransformerConfigs() # Set the vocabulary sizes for embeddings and generating logits conf.n_src_vocab = c.n_tokens conf.n_tgt_vocab = c.n_tokens # Set the embedding size conf.d_model = c.d_model # Replace self-attention with an [AFT Local Module](index.html) from labml_nn.transformers.aft import AFTLocal conf.encoder_attn = AFTLocal(c.d_model, c.seq_len, c.local_window_size) # return conf
def _transformer(): """ Create transformer configs """ return TransformerConfigs()