def create_model(hparams, vocab_src, vocab_tgt): rnnlm = RNNLM(vocab_size=vocab_src.size(), emb_size=hparams.emb_size, hidden_size=hparams.hidden_size, pad_idx=vocab_src[PAD_TOKEN], dropout=hparams.dropout, num_layers=hparams.num_dec_layers, cell_type=hparams.cell_type, tied_embeddings=hparams.tied_embeddings) encoder = RNNEncoder(emb_size=hparams.emb_size, hidden_size=hparams.hidden_size, bidirectional=hparams.bidirectional, dropout=hparams.dropout, num_layers=hparams.num_enc_layers, cell_type=hparams.cell_type) attention = create_attention(hparams) decoder = create_decoder(attention, hparams) model = AEVNMT(tgt_vocab_size=vocab_tgt.size(), emb_size=hparams.emb_size, latent_size=hparams.latent_size, encoder=encoder, decoder=decoder, language_model=rnnlm, pad_idx=vocab_tgt[PAD_TOKEN], dropout=hparams.dropout, tied_embeddings=hparams.tied_embeddings) return model
def __init__(self, src_embedder, hidden_size, latent_size, bidirectional, num_enc_layers, cell_type): """ :param src_embedder: uses this embedder, but detaches its output from the graph as to not compute gradients for it. """ super().__init__() self.src_embedder = src_embedder emb_size = src_embedder.embedding_dim self.encoder = RNNEncoder(emb_size=emb_size, hidden_size=hidden_size, bidirectional=bidirectional, dropout=0., num_layers=num_enc_layers, cell_type=cell_type) encoding_size = hidden_size if not bidirectional else hidden_size * 2 self.normal_layer = NormalLayer(encoding_size, hidden_size, latent_size)
class InferenceNetwork(nn.Module): def __init__(self, src_embedder, hidden_size, latent_size, bidirectional, num_enc_layers, cell_type): """ :param src_embedder: uses this embedder, but detaches its output from the graph as to not compute gradients for it. """ super().__init__() self.src_embedder = src_embedder emb_size = src_embedder.embedding_dim self.encoder = RNNEncoder(emb_size=emb_size, hidden_size=hidden_size, bidirectional=bidirectional, dropout=0., num_layers=num_enc_layers, cell_type=cell_type) encoding_size = hidden_size if not bidirectional else hidden_size * 2 self.normal_layer = NormalLayer(encoding_size, hidden_size, latent_size) def forward(self, x, seq_mask_x, seq_len_x): x_embed = self.src_embedder(x).detach() encoder_outputs, _ = self.encoder(x_embed, seq_len_x) avg_encoder_output = ( encoder_outputs * seq_mask_x.unsqueeze(-1).type_as(encoder_outputs)).sum(dim=1) return self.normal_layer(avg_encoder_output) def parameters(self, recurse=True): return chain(self.encoder.parameters(recurse=recurse), self.normal_layer.parameters(recurse=recurse)) def named_parameters(self, prefix='', recurse=True): return chain( self.encoder.named_parameters(prefix='', recurse=True), self.normal_layer.named_parameters(prefix='', recurse=True), )
def create_encoder(hparams): if hparams.encoder_style == "rnn": return RNNEncoder(emb_size=hparams.emb_size, hidden_size=hparams.hidden_size, bidirectional=hparams.bidirectional, dropout=hparams.dropout, num_layers=hparams.num_enc_layers, cell_type=hparams.cell_type) elif hparams.encoder_style == "transformer": return TransformerEncoder(input_size=hparams.emb_size, num_heads=hparams.transformer_heads, num_layers=hparams.num_enc_layers, dim_ff=hparams.transformer_hidden, dropout=hparams.dropout) else: raise Exception(f"Unknown encoder style: {hparams.encoder_style}")
def create_model(hparams, vocab_src, vocab_tgt): encoder = RNNEncoder(emb_size=hparams.emb_size, hidden_size=hparams.hidden_size, bidirectional=hparams.bidirectional, dropout=hparams.dropout, num_layers=hparams.num_enc_layers, cell_type=hparams.cell_type) attention = create_attention(hparams) decoder = create_decoder(attention, hparams) model = ConditionalNMT(src_vocab_size=vocab_src.size(), tgt_vocab_size=vocab_tgt.size(), emb_size=hparams.emb_size, encoder=encoder, decoder=decoder, pad_idx=vocab_tgt[PAD_TOKEN], dropout=hparams.dropout, tied_embeddings=hparams.tied_embeddings) return model
def __init__(self, embedder, hidden_size, num_layers, cell_type, bidirectional=True, composition="avg", dropout=0.): super().__init__() self.embedder = embedder self.rnn = RNNEncoder(emb_size=embedder.embedding_dim, hidden_size=hidden_size, bidirectional=bidirectional, dropout=dropout, num_layers=num_layers, cell_type=cell_type) self.output_size = hidden_size if not bidirectional else hidden_size * 2 self.hidden_size = hidden_size self.composition = composition if composition not in ["avg", "maxpool"]: raise NotImplementedError( "I only support average and maxpool, but I welcome contributions!" )