def __init__(self, d_model, heads, dropout=0.1):
        super(DecoderLayer, self).__init__()

        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)
        self.norm_3 = Norm(d_model)

        self.attn_1 = MultiHeadAttention(heads, d_model)
        self.attn_2 = MultiHeadAttention(heads, d_model)
        self.ff = FeedForward(d_model).cuda()

        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        self.dropout_3 = nn.Dropout(dropout)
    def __init__(self, d_model, N, heads, max_seq_len):
        super(TransformerEncoder, self).__init__()

        self.N = N
        self.position = PositionalEncoder(d_model, max_seq_len)
        # Generate N Encoder layers
        self.layers = get_clones(EncoderLayer(d_model, heads), N)
        self.norm = Norm(d_model)
    def __init__(self, d_model, heads, dropout=0.1):
        """
        :param d_model:
        :param heads:
        :param dropout:
        """
        super(EncoderLayer, self).__init__()

        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)

        # Two main sub-layers
        self.attn = MultiHeadAttention(heads, d_model)
        self.ff = FeedForward(d_model)

        # Residual dropout
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
    def __init__(self, vocab_size, d_model, N, heads, max_seq_len):
        super(TransformerDecoder, self).__init__()

        self.N = N
        self.embed = Embedder(vocab_size, d_model)
        self.position = PositionalEncoder(d_model, max_seq_len)
        # Generate N Decoder layers
        self.layers = get_clones(DecoderLayer(d_model, heads), N)
        self.norm = Norm(d_model)
    def __init__(self, d_model, heads, dropout=0.1):
        """
        Input:
        d_model is length of vector of embeddings
        heads is number of heads for MultiHeadAttetion
        """
        super(EncoderLayer, self).__init__()

        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)

        # Two main sub-layers
        self.attn = MultiHeadAttention(heads, d_model)
        self.ff = FeedForward(d_model)

        # Residual dropout
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
Example #6
0
    def __init__(self,
                 vocab_size,
                 input_dim,
                 hidden_dim,
                 num_layer,
                 bidirectional,
                 dropout=0.2):
        super(LSTMDecoder, self).__init__()

        self.vocab_size = vocab_size
        self.hidden_dim = hidden_dim
        self.embed = Embedder(vocab_size, input_dim)
        self.norm = Norm(input_dim)

        # Define LSTM cell
        self.lstm_cell = nn.LSTMCell(input_dim, hidden_dim)
        self.linear_out = nn.Linear(hidden_dim, vocab_size)
        self.attn = GlobalAttentionCell(hidden_dim)