예제 #1
0
def _get_attention_cell(attention_cell, units=None):
    """

    Parameters
    ----------
    attention_cell : AttentionCell or str
    units : int or None

    Returns
    -------
    attention_cell : AttentionCell
    """
    # import pdb; pdb.set_trace()
    if isinstance(attention_cell, str):
        if attention_cell == 'scaled_luong':
            return DotProductAttentionCell(units=units,
                                           scaled=True,
                                           normalized=False,
                                           luong_style=True)
        elif attention_cell == 'scaled_dot':
            return DotProductAttentionCell(units=None,
                                           scaled=True,
                                           normalized=False,
                                           luong_style=False)
        elif attention_cell == 'dot':
            return DotProductAttentionCell(units=None,
                                           scaled=False,
                                           normalized=False,
                                           luong_style=False)
        elif attention_cell == 'cosine':
            return DotProductAttentionCell(units=units,
                                           scaled=False,
                                           normalized=True)
        elif attention_cell == 'mlp':
            return MLPAttentionCell(units=units, normalized=False)
        elif attention_cell == 'normed_mlp':
            return MLPAttentionCell(units=units, normalized=True)
        elif attention_cell == 'MultiHeadAttentionCell':
            attention_cell = MLPAttentionCell(units=units, normalized=False)
            # return MultiHeadAttentionCell(base_cell=attention_cell, query_units=units, key_units=units, value_units=units, num_heads=4)
            return MultiHeadAttentionCell(base_cell=attention_cell,
                                          query_units=units,
                                          key_units=units,
                                          value_units=units,
                                          num_heads=4)

        else:
            raise NotImplementedError
    else:
        assert isinstance(attention_cell, AttentionCell),\
            'attention_cell must be either string or AttentionCell. Received attention_cell={}'\
                .format(attention_cell)
        return attention_cell
예제 #2
0
def _get_attention_cell(attention_cell, units=None):
    """

    Parameters
    ----------
    attention_cell : AttentionCell or str
    units : int or None

    Returns
    -------
    attention_cell : AttentionCell
    """
    if isinstance(attention_cell, str):
        if attention_cell == 'scaled_luong':
            return DotProductAttentionCell(units=units,
                                           scaled=True,
                                           normalized=False,
                                           luong_style=True)
        elif attention_cell == 'scaled_dot':
            return DotProductAttentionCell(units=None,
                                           scaled=True,
                                           normalized=False,
                                           luong_style=False)
        elif attention_cell == 'dot':
            return DotProductAttentionCell(units=None,
                                           scaled=False,
                                           normalized=False,
                                           luong_style=False)
        elif attention_cell == 'cosine':
            return DotProductAttentionCell(units=units,
                                           scaled=False,
                                           normalized=True)
        elif attention_cell == 'mlp':
            return MLPAttentionCell(units=units, normalized=False)
        elif attention_cell == 'normed_mlp':
            return MLPAttentionCell(units=units, normalized=True)
        else:
            raise NotImplementedError
    else:
        assert isinstance(attention_cell, AttentionCell),\
            'attention_cell must be either string or AttentionCell. Received attention_cell={}'\
                .format(attention_cell)
        return attention_cell
def _get_attention_cell(attention_cell, units=None,
                        scaled=True, num_heads=None,
                        use_bias=False, dropout=0.0):
    """

    Parameters
    ----------
    attention_cell : AttentionCell or str
    units : int or None

    Returns
    -------
    attention_cell : AttentionCell
    """
    if isinstance(attention_cell, str):
        if attention_cell == 'scaled_luong':
            return DotProductAttentionCell(units=units, scaled=True, normalized=False,
                                           use_bias=use_bias, dropout=dropout, luong_style=True)
        elif attention_cell == 'scaled_dot':
            return DotProductAttentionCell(units=units, scaled=True, normalized=False,
                                           use_bias=use_bias, dropout=dropout, luong_style=False)
        elif attention_cell == 'dot':
            return DotProductAttentionCell(units=units, scaled=False, normalized=False,
                                           use_bias=use_bias, dropout=dropout, luong_style=False)
        elif attention_cell == 'cosine':
            return DotProductAttentionCell(units=units, scaled=False, use_bias=use_bias,
                                           dropout=dropout, normalized=True)
        elif attention_cell == 'mlp':
            return MLPAttentionCell(units=units, normalized=False)
        elif attention_cell == 'normed_mlp':
            return MLPAttentionCell(units=units, normalized=True)
        elif attention_cell == 'multi_head':
            base_cell = DotProductAttentionCell(scaled=scaled, dropout=dropout)
            return MultiHeadAttentionCell(base_cell=base_cell, query_units=units, use_bias=use_bias,
                                          key_units=units, value_units=units, num_heads=num_heads)
        else:
            raise NotImplementedError
    else:
        assert isinstance(attention_cell, AttentionCell),\
            'attention_cell must be either string or AttentionCell. Received attention_cell={}'\
                .format(attention_cell)
        return attention_cell
예제 #4
0
 def __init__(self, num_heads, **kwargs):
     super(SelfAttention, self).__init__(**kwargs)
     with self.name_scope():
         self.attention = MultiHeadAttentionCell(
             num_heads=num_heads,
             base_cell=DotProductAttentionCell(scaled=True,
                                               dropout=opt.layers_dropout,
                                               use_bias=False),
             query_units=opt.emb_encoder_conv_channels,
             key_units=opt.emb_encoder_conv_channels,
             value_units=opt.emb_encoder_conv_channels,
             use_bias=False,
             weight_initializer=Xavier())
예제 #5
0
 def __init__(self, num_heads, **kwargs):
     super(SelfAttention, self).__init__(**kwargs)
     with self.name_scope():
         self.attention = MultiHeadAttentionCell(
             num_heads=num_heads,
             base_cell=DotProductAttentionCell(
                 scaled=True,
                 dropout=0.1,
                 use_bias=False
             ),
             query_units=EMB_ENCODER_CONV_CHANNELS,
             key_units=EMB_ENCODER_CONV_CHANNELS,
             value_units=EMB_ENCODER_CONV_CHANNELS,
             use_bias=False,
             weight_initializer=Xavier()
         )
예제 #6
0
import mxnet as mx
from mxnet import nd
from gluonnlp.model import MultiHeadAttentionCell,DotProductAttentionCell
from mxnet.gluon import Block, nn, rnn
import RNN
import random
from customlayer import *
import Constant
base_cell = DotProductAttentionCell(scaled=True, dropout = 0.2)

class Resblock(Block):
    
    def __init__(self, model_dim, dropout =0.1):
        super(Resblock, self).__init__()
        self.model_dim = model_dim
        self.dropout = dropout
        self.resblock = nn.Sequential()
        with self.resblock.name_scope():
            self.resblock.add(nn.LayerNorm())
            self.resblock.add(nn.Dense(2*self.model_dim,in_units= self.model_dim,activation="relu"))
            self.resblock.add(nn.Dropout(self.dropout))
            self.resblock.add(nn.Dense(self.model_dim,in_units = 2*self.model_dim))
            self.resblock.add(nn.Dropout(self.dropout))
            
    def forward(self, x):
        output = self.resblock(x)
        return output+x

class Encoder(Block):
    
    def __init__(self, embedding_dim, head_count, model_dim, drop_prob, dropout):