def _get_attention_cell(attention_cell, units=None): """ Parameters ---------- attention_cell : AttentionCell or str units : int or None Returns ------- attention_cell : AttentionCell """ # import pdb; pdb.set_trace() if isinstance(attention_cell, str): if attention_cell == 'scaled_luong': return DotProductAttentionCell(units=units, scaled=True, normalized=False, luong_style=True) elif attention_cell == 'scaled_dot': return DotProductAttentionCell(units=None, scaled=True, normalized=False, luong_style=False) elif attention_cell == 'dot': return DotProductAttentionCell(units=None, scaled=False, normalized=False, luong_style=False) elif attention_cell == 'cosine': return DotProductAttentionCell(units=units, scaled=False, normalized=True) elif attention_cell == 'mlp': return MLPAttentionCell(units=units, normalized=False) elif attention_cell == 'normed_mlp': return MLPAttentionCell(units=units, normalized=True) elif attention_cell == 'MultiHeadAttentionCell': attention_cell = MLPAttentionCell(units=units, normalized=False) # return MultiHeadAttentionCell(base_cell=attention_cell, query_units=units, key_units=units, value_units=units, num_heads=4) return MultiHeadAttentionCell(base_cell=attention_cell, query_units=units, key_units=units, value_units=units, num_heads=4) else: raise NotImplementedError else: assert isinstance(attention_cell, AttentionCell),\ 'attention_cell must be either string or AttentionCell. Received attention_cell={}'\ .format(attention_cell) return attention_cell
def _get_attention_cell(attention_cell, units=None): """ Parameters ---------- attention_cell : AttentionCell or str units : int or None Returns ------- attention_cell : AttentionCell """ if isinstance(attention_cell, str): if attention_cell == 'scaled_luong': return DotProductAttentionCell(units=units, scaled=True, normalized=False, luong_style=True) elif attention_cell == 'scaled_dot': return DotProductAttentionCell(units=None, scaled=True, normalized=False, luong_style=False) elif attention_cell == 'dot': return DotProductAttentionCell(units=None, scaled=False, normalized=False, luong_style=False) elif attention_cell == 'cosine': return DotProductAttentionCell(units=units, scaled=False, normalized=True) elif attention_cell == 'mlp': return MLPAttentionCell(units=units, normalized=False) elif attention_cell == 'normed_mlp': return MLPAttentionCell(units=units, normalized=True) else: raise NotImplementedError else: assert isinstance(attention_cell, AttentionCell),\ 'attention_cell must be either string or AttentionCell. Received attention_cell={}'\ .format(attention_cell) return attention_cell
def _get_attention_cell(attention_cell, units=None, scaled=True, num_heads=None, use_bias=False, dropout=0.0): """ Parameters ---------- attention_cell : AttentionCell or str units : int or None Returns ------- attention_cell : AttentionCell """ if isinstance(attention_cell, str): if attention_cell == 'scaled_luong': return DotProductAttentionCell(units=units, scaled=True, normalized=False, use_bias=use_bias, dropout=dropout, luong_style=True) elif attention_cell == 'scaled_dot': return DotProductAttentionCell(units=units, scaled=True, normalized=False, use_bias=use_bias, dropout=dropout, luong_style=False) elif attention_cell == 'dot': return DotProductAttentionCell(units=units, scaled=False, normalized=False, use_bias=use_bias, dropout=dropout, luong_style=False) elif attention_cell == 'cosine': return DotProductAttentionCell(units=units, scaled=False, use_bias=use_bias, dropout=dropout, normalized=True) elif attention_cell == 'mlp': return MLPAttentionCell(units=units, normalized=False) elif attention_cell == 'normed_mlp': return MLPAttentionCell(units=units, normalized=True) elif attention_cell == 'multi_head': base_cell = DotProductAttentionCell(scaled=scaled, dropout=dropout) return MultiHeadAttentionCell(base_cell=base_cell, query_units=units, use_bias=use_bias, key_units=units, value_units=units, num_heads=num_heads) else: raise NotImplementedError else: assert isinstance(attention_cell, AttentionCell),\ 'attention_cell must be either string or AttentionCell. Received attention_cell={}'\ .format(attention_cell) return attention_cell
def __init__(self, num_heads, **kwargs): super(SelfAttention, self).__init__(**kwargs) with self.name_scope(): self.attention = MultiHeadAttentionCell( num_heads=num_heads, base_cell=DotProductAttentionCell(scaled=True, dropout=opt.layers_dropout, use_bias=False), query_units=opt.emb_encoder_conv_channels, key_units=opt.emb_encoder_conv_channels, value_units=opt.emb_encoder_conv_channels, use_bias=False, weight_initializer=Xavier())
def __init__(self, num_heads, **kwargs): super(SelfAttention, self).__init__(**kwargs) with self.name_scope(): self.attention = MultiHeadAttentionCell( num_heads=num_heads, base_cell=DotProductAttentionCell( scaled=True, dropout=0.1, use_bias=False ), query_units=EMB_ENCODER_CONV_CHANNELS, key_units=EMB_ENCODER_CONV_CHANNELS, value_units=EMB_ENCODER_CONV_CHANNELS, use_bias=False, weight_initializer=Xavier() )
import mxnet as mx from mxnet import nd from gluonnlp.model import MultiHeadAttentionCell,DotProductAttentionCell from mxnet.gluon import Block, nn, rnn import RNN import random from customlayer import * import Constant base_cell = DotProductAttentionCell(scaled=True, dropout = 0.2) class Resblock(Block): def __init__(self, model_dim, dropout =0.1): super(Resblock, self).__init__() self.model_dim = model_dim self.dropout = dropout self.resblock = nn.Sequential() with self.resblock.name_scope(): self.resblock.add(nn.LayerNorm()) self.resblock.add(nn.Dense(2*self.model_dim,in_units= self.model_dim,activation="relu")) self.resblock.add(nn.Dropout(self.dropout)) self.resblock.add(nn.Dense(self.model_dim,in_units = 2*self.model_dim)) self.resblock.add(nn.Dropout(self.dropout)) def forward(self, x): output = self.resblock(x) return output+x class Encoder(Block): def __init__(self, embedding_dim, head_count, model_dim, drop_prob, dropout):