Example #1
0
 def __init__(self, d_model, n_heads, d_ffn, dropout):
     super().__init__()
     self.self_attn = MultiHeadAttention(d_model, n_heads, dropout)
     self.feed_forward = PositionwiseFeedForward(d_model, d_ffn, dropout)
     self.sublayers = nn.ModuleList(
         [SublayerConnection(d_model, dropout) for _ in range(2)])
     self.d_model = d_model
Example #2
0
    def __init__(self, d_model, d_hidden, n_heads, d_k, d_v, dropout=0.1):
        super(DecoderLayer, self).__init__()

        self.slf_attn = MultiHeadAttention(
            n_heads=n_heads, d_k=d_k, d_v=d_v, d_model=d_model, dropout=dropout)
        self.inter_attn = MultiHeadAttention(
            n_heads=n_heads, d_k=d_k, d_v=d_v, d_model=d_model, dropout=dropout)
        self.fc = PositionwiseFeedForward(
            d_hidden=d_hidden, d_model=d_model, dropout=dropout)
def Transformer(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1):
    """
    Helper: Construct a model from hyperparameters.
    """
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, deepcopy(attn), deepcopy(ff), dropout), N),
        Decoder(DecoderLayer(d_model, deepcopy(attn), deepcopy(attn), deepcopy(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), deepcopy(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), deepcopy(position)),
        Generator(d_model, tgt_vocab)
    )
    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    return model
Example #4
0
def LevenshteinTransformerModel(src_vocab,
                                tgt_vocab,
                                PAD,
                                BOS,
                                EOS,
                                UNK,
                                criterion,
                                d_model=512,
                                n=6,
                                h=8,
                                d_ff=2048,
                                dropout=0.0,
                                input_dropout=0.1):
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, input_dropout)
    model = LevenshteinEncodeDecoder(
        Encoder(EncoderLayer(d_model, deepcopy(attn), deepcopy(ff), dropout),
                n),
        LevenshteinDecoder(DecoderLayer(d_model, deepcopy(attn),
                                        deepcopy(attn), deepcopy(ff), dropout),
                           n=n,
                           output_embed_dim=d_model,
                           tgt_vocab=tgt_vocab),
        nn.Sequential(Embeddings(d_model, src_vocab), deepcopy(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), deepcopy(position)),
        Generator(d_model, tgt_vocab),
        pad=PAD,
        bos=BOS,
        eos=EOS,
        unk=UNK,
        criterion=criterion)
    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    return model
 def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
     super(DecoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
     self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)