def build_decoder(hidden_size: int, embedding_dim: int, vocab_size: int): assert embedding_dim == hidden_size, ( f"Different embedding_dim and hidden_size not implemented") decoder_layer = LSTMLayer(input_size=hidden_size, hidden_size=hidden_size) generator = Generator(hidden_size, embedding_dim) vocab_predictor = VocabPredictor(hidden_size, vocab_size) word_attention = AdditiveAttention(hidden_size, bias=True) agent_attention = AdditiveAttention(hidden_size, bias=True) return Decoder(decoder_layer=decoder_layer, vocab_predictor=vocab_predictor, generator=generator, word_attention=word_attention, agent_attention=agent_attention)
def _make_model(self): # embedding embedding = nn.Embedding(num_embeddings=self._config.vocab_size, embedding_dim=self._config.embed_size) embedding.weight.data.copy_( torch.from_numpy(np.load(self._config.embedding_file_name))) embedding.weight.requires_grad = False # encoder encoder = Encoder(rnn_type=self._config.rnn_type, embed_size=self._config.embed_size, hidden_size=self._config.hidden_size, num_layers=self._config.num_layers, bidirectional=self._config.bidirectional, dropout=self._config.dropout) # birdge bridge = Bridge(rnn_type=self._config.rnn_type, hidden_size=self._config.hidden_size, bidirectional=self._config.bidirectional) # decoder rnn cell if self._config.rnn_type == 'LSTM': rnn_cell = MultiLayerLSTMCells( input_size=2 * self._config.embed_size, hidden_size=self._config.hidden_size, num_layers=self._config.num_layers, dropout=self._config.dropout) else: rnn_cell = MultiLayerGRUCells(input_size=2 * self._config.embed_size, hidden_size=self._config.hidden_size, num_layers=self._config.num_layers, dropout=self._config.dropout) # attention if self._config.attention_type == 'Dot': attention = DotAttention() elif self._config.attention_type == 'ScaledDot': attention = ScaledDotAttention() elif self._config.attention_type == 'Additive': attention = AdditiveAttention(query_size=self._config.hidden_size, key_size=self._config.hidden_size) elif self._config.attention_type == 'Multiplicative': attention = MultiplicativeAttention( query_size=self._config.hidden_size, key_size=self._config.hidden_size) elif self._config.attention_type == 'MLP': attention = MultiLayerPerceptronAttention( query_size=self._config.hidden_size, key_size=self._config.hidden_size, out_size=1) else: raise ValueError('No Supporting.') # decoder decoder = Decoder(embedding, rnn_cell, attention, self._config.hidden_size) # model model = Seq2Seq(embedding, encoder, bridge, decoder) return model
def __init__(self, hparams, weight=None): super(NRMS, self).__init__() self.hparams = hparams self.doc_encoder = DocEncoder(hparams, weight=weight) self.mha = nn.MultiheadAttention(hparams['encoder_size'], hparams['nhead'], dropout=0.1) self.proj = nn.Linear(hparams['encoder_size'], hparams['encoder_size']) self.additive_attn = AdditiveAttention(hparams['encoder_size'], hparams['v_size']) self.criterion = nn.CrossEntropyLoss()
def __init__(self, hparams, weight=None) -> None: super(DocEncoder, self).__init__() self.hparams = hparams if weight is None: self.embedding = nn.Embedding(22537, 300, padding_idx=0) else: self.embedding = nn.Embedding.from_pretrained(weight, freeze=False, padding_idx=0) self.mha = nn.MultiheadAttention(hparams['embed_size'], num_heads=hparams['nhead'], dropout=0.1) self.proj = nn.Linear(hparams['embed_size'], hparams['encoder_size']) self.additive_attn = AdditiveAttention(hparams['encoder_size'], hparams['v_size'])
def __init__(self, hparams, weight=None): """Init News Encoder. Args: hparams (dict): Configuration parameters. weight (tensor): Embeding weight. """ super(NRMS, self).__init__() self.hparams = hparams self.doc_encoder = DocEncoder(hparams, weight=weight) self.mha = nn.MultiheadAttention(hparams['encoder_size'], hparams['nhead'], dropout=0.1) self.proj = nn.Linear(hparams['encoder_size'], hparams['encoder_size']) self.additive_attn = AdditiveAttention(hparams['encoder_size'], hparams['v_size']) self.criterion = nn.CrossEntropyLoss()
def __init__(self, hparams, weight=None): super(NRMS, self).__init__() self.hparams = hparams self.doc_encoder = DocEncoder(hparams, weight=weight) # proj = InProjContainer(nn.Linear(hparams['encoder_size'], hparams['encoder_size']), # nn.Linear(hparams['encoder_size'], hparams['encoder_size']), # nn.Linear(hparams['encoder_size'], hparams['encoder_size'])) self.mha = nn.MultiheadAttention(hparams['encoder_size'], hparams['nhead'], dropout=0.1) # self.mha = MultiheadAttentionContainer(nhead=hparams['nhead'], # in_proj_container=proj, # attention_layer=ScaledDotProduct(), # out_proj=nn.Linear(hparams['encoder_size'], hparams['encoder_size'])) self.proj = nn.Linear(hparams['encoder_size'], hparams['encoder_size']) self.additive_attn = AdditiveAttention(hparams['encoder_size'], hparams['v_size']) self.criterion = nn.CrossEntropyLoss()
def __init__(self, hparams, weight=None) -> None: """Init News Encoder. Args: hparams (dict): Configuration parameters. weight (tensor): Embeding weight. """ super(DocEncoder, self).__init__() self.hparams = hparams if weight is None: self.embedding = nn.Embedding(100, 300) else: self.embedding = nn.Embedding.from_pretrained(weight, freeze=False, padding_idx=0) self.mha = nn.MultiheadAttention(hparams['embed_size'], hparams['nhead'], 0.1) self.proj = nn.Linear(hparams['embed_size'], hparams['encoder_size']) self.additive_attn = AdditiveAttention(hparams['encoder_size'], hparams['v_size'])
def attention(): from model.attention import MultiheadAttentionContainer, InProjContainer, ScaledDotProduct, AdditiveAttention q = torch.rand(10, 16, 200) k = v = torch.rand(10, 16, 200) print('MultiHead: ') proj = InProjContainer(nn.Linear(200, 200), nn.Linear(200, 200), nn.Linear(200, 200)) mha = MultiheadAttentionContainer(nhead=8, in_proj_container=proj, attention_layer=ScaledDotProduct(), out_proj=nn.Linear(200, 200)) output, score = mha(q, k, v) print(output.shape) print('Additive: ') attn = AdditiveAttention(200, 300) output = output.permute(1, 0, 2) output, score = attn(output) print(output.size(), score.size())
def _make_model(self): embedding = nn.Embedding(self._config.vocab_size, self._config.embed_size) embedding.weight.data.copy_( torch.from_numpy(np.load(self._config.embedding_file_name))) embedding.weight.requires_grad = False encoder = Encoder(self._config.embed_size, self._config.hidden_size, self._config.num_layers, self._config.bidirectional, self._config.dropout) bridge = Bridge(self._config.hidden_size, self._config.bidirectional) lstm_cell = MultiLayerLSTMCells(2 * self._config.embed_size, self._config.hidden_size, self._config.num_layers, dropout=self._config.dropout) # attention = MultiplicativeAttention(self._config.hidden_size, self._config.hidden_size) attention = AdditiveAttention(self._config.hidden_size, self._config.hidden_size) decoder = Decoder(embedding, lstm_cell, attention, self._config.hidden_size) model = Seq2Seq(embedding, encoder, bridge, decoder) return model