def __init__(self, vocab_size, pad_id, config=None): super(PointerModel, self).__init__() self.vocab_size = vocab_size self.pad_id = pad_id self.config = config self.batch_size = config['data']['batch_size'] self.options = config['model'] self.embedding = nn.Embedding(self.vocab_size, self.options['emb_dim'], self.pad_id) if self.options['encoder'] == 'lstm': self.encoder = encoders.LSTMEncoder(self.options['emb_dim'], self.options['enc_hidden_dim'], self.options['enc_layers'], self.options['bidirectional'], self.options['dropout']) self.ctx_bridge = nn.Linear(self.options['enc_hidden_dim'], self.options['dec_hidden_dim']) else: # TODO: GRU encoder raise NotImplementedError('unknown encoder type') self.attribute_encoder = encoders.LSTMEncoder( self.options['emb_dim'], self.options['enc_hidden_dim'], self.options['enc_layers'], self.options['bidirectional'], self.options['dropout'], pack=False) self.attr_size = self.options['enc_hidden_dim'] self.p_gen_linear = nn.Linear(self.options['enc_hidden_dim'] * 4, 1) self.c_bridge = nn.Linear( self.attr_size + self.options['enc_hidden_dim'], self.options['dec_hidden_dim']) self.h_bridge = nn.Linear( self.attr_size + self.options['enc_hidden_dim'], self.options['dec_hidden_dim']) self.decoder = decoders.StackedAttentionLSTM(config=config) self.output_projection = nn.Linear(self.options['dec_hidden_dim'], self.vocab_size) self.softmax = nn.Softmax(dim=-1) self.init_weights()
def __init__( self, src_vocab_size, tgt_vocab_size, pad_id_src, pad_id_tgt, config=None, ): """Initialize model.""" super(SeqModel, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.pad_id_src = pad_id_src self.pad_id_tgt = pad_id_tgt self.batch_size = config['data']['batch_size'] self.config = config self.options = config['model'] self.model_type = config['model']['model_type'] self.src_embedding = nn.Embedding(self.src_vocab_size, self.options['emb_dim'], self.pad_id_src) if self.config['data']['share_vocab']: self.tgt_embedding = self.src_embedding else: self.tgt_embedding = nn.Embedding(self.tgt_vocab_size, self.options['emb_dim'], self.pad_id_tgt) if self.options['encoder'] == 'lstm': self.encoder = encoders.LSTMEncoder(self.options['emb_dim'], self.options['src_hidden_dim'], self.options['src_layers'], self.options['bidirectional'], self.options['dropout']) self.ctx_bridge = nn.Linear(self.options['src_hidden_dim'], self.options['tgt_hidden_dim']) else: raise NotImplementedError('unknown encoder type') # # # # # # # # # # # # # # # # # NEW STUFF FROM STD SEQ2SEQ if self.model_type == 'delete': self.attribute_embedding = nn.Embedding( num_embeddings=2, embedding_dim=self.options['emb_dim']) attr_size = self.options['emb_dim'] elif self.model_type == 'delete_retrieve': self.attribute_encoder = encoders.LSTMEncoder( self.options['emb_dim'], self.options['src_hidden_dim'], self.options['src_layers'], self.options['bidirectional'], self.options['dropout'], pack=False) attr_size = self.options['src_hidden_dim'] elif self.model_type == 'seq2seq': attr_size = 0 else: raise NotImplementedError('unknown model type') self.c_bridge = nn.Linear(attr_size + self.options['src_hidden_dim'], self.options['tgt_hidden_dim']) self.h_bridge = nn.Linear(attr_size + self.options['src_hidden_dim'], self.options['tgt_hidden_dim']) # # # # # # # # # # # # # # # # # END NEW STUFF self.decoder = decoders.StackedAttentionLSTM(config=config) self.output_projection = nn.Linear(self.options['tgt_hidden_dim'], tgt_vocab_size) self.softmax = nn.Softmax(dim=-1) self.init_weights()
def __init__( self, src_vocab_size, tgt_vocab_size, pad_id_src, pad_id_tgt, config=None, ): """Initialize model.""" super(SeqModel, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.pad_id_src = pad_id_src self.pad_id_tgt = pad_id_tgt self.batch_size = config['data']['batch_size'] self.config = config self.options = config['model'] self.model_type = config['model']['model_type'] self.src_embedding = nn.Embedding(self.src_vocab_size, self.options['emb_dim'], self.pad_id_src) if self.config['data']['share_vocab']: self.tgt_embedding = self.src_embedding else: self.tgt_embedding = nn.Embedding(self.tgt_vocab_size, self.options['emb_dim'], self.pad_id_tgt) if self.options['encoder'] == 'lstm': self.encoder = encoders.LSTMEncoder(self.options['emb_dim'], self.options['src_hidden_dim'], self.options['src_layers'], self.options['bidirectional'], self.options['dropout']) self.ctx_bridge = nn.Linear(self.options['src_hidden_dim'], self.options['tgt_hidden_dim']) else: raise NotImplementedError('unknown encoder type') # # # # # # # # # # # # # # # # # NEW STUFF FROM STD SEQ2SEQ if self.model_type == 'delete': self.attribute_embedding = nn.Embedding( num_embeddings=2, embedding_dim=self.options['emb_dim']) attr_size = self.options['emb_dim'] elif self.model_type in 'delete_retrieve': self.attribute_encoder = encoders.LSTMEncoder( self.options['emb_dim'], self.options['src_hidden_dim'], self.options['src_layers'], self.options['bidirectional'], self.options['dropout'], pack=False) attr_size = self.options['src_hidden_dim'] elif self.model_type == 'seq2seq': attr_size = 0 else: raise NotImplementedError('unknown model type') self.c_bridge = nn.Linear(self.options['src_hidden_dim'], self.options['tgt_hidden_dim']) self.h_bridge = nn.Linear(attr_size + self.options['src_hidden_dim'], self.options['tgt_hidden_dim']) if self.config['experimental']['predict_sides']: if self.config['experimental']['side_attn_type'] == 'feedforward': self.side_attn = ops.FeedForwardAttention( input_dim=self.options['src_hidden_dim'], hidden_dim=self.options['src_hidden_dim'], layers=2, dropout=self.options['dropout']) elif self.config['experimental']['side_attn_type'] == 'dot': self.side_attn = ops.BilinearAttention( hidden=self.options['src_hidden_dim']) elif self.config['experimental']['side_attn_type'] == 'bahdanau': self.side_attn = ops.BilinearAttention( hidden=self.options['src_hidden_dim'], score_fn='bahdanau') self.side_predictor = ops.FFNN( input_dim=self.options['src_hidden_dim'], hidden_dim=self.options['src_hidden_dim'], output_dim=self.config['experimental'] ['n_side_outputs'], # TODO -- SET SOMEWHERE nlayers=2, dropout=self.options['dropout']) if self.config['experimental']['add_side_embeddings']: self.side_embeddings = nn.Parameter(torch.zeros( self.config['experimental']['n_side_outputs'], self.options['emb_dim']), requires_grad=True) self.h_compression = nn.Linear( self.options['emb_dim'] + self.options['src_hidden_dim'], self.options['tgt_hidden_dim']) self.side_softmax = nn.Softmax(dim=-1) # # # # # # # # # # # # # # # # # END NEW STUFF self.decoder = decoders.StackedAttentionLSTM(config=config) self.output_projection = nn.Linear(self.options['tgt_hidden_dim'], tgt_vocab_size) self.softmax = nn.Softmax(dim=-1) self.init_weights()