예제 #1
0
    def __init__(self, vocab_size, pad_id, config=None):
        super(PointerModel, self).__init__()
        self.vocab_size = vocab_size
        self.pad_id = pad_id
        self.config = config
        self.batch_size = config['data']['batch_size']
        self.options = config['model']

        self.embedding = nn.Embedding(self.vocab_size, self.options['emb_dim'],
                                      self.pad_id)

        if self.options['encoder'] == 'lstm':
            self.encoder = encoders.LSTMEncoder(self.options['emb_dim'],
                                                self.options['enc_hidden_dim'],
                                                self.options['enc_layers'],
                                                self.options['bidirectional'],
                                                self.options['dropout'])
            self.ctx_bridge = nn.Linear(self.options['enc_hidden_dim'],
                                        self.options['dec_hidden_dim'])
        else:
            # TODO: GRU encoder
            raise NotImplementedError('unknown encoder type')

        self.attribute_encoder = encoders.LSTMEncoder(
            self.options['emb_dim'],
            self.options['enc_hidden_dim'],
            self.options['enc_layers'],
            self.options['bidirectional'],
            self.options['dropout'],
            pack=False)
        self.attr_size = self.options['enc_hidden_dim']

        self.p_gen_linear = nn.Linear(self.options['enc_hidden_dim'] * 4, 1)

        self.c_bridge = nn.Linear(
            self.attr_size + self.options['enc_hidden_dim'],
            self.options['dec_hidden_dim'])
        self.h_bridge = nn.Linear(
            self.attr_size + self.options['enc_hidden_dim'],
            self.options['dec_hidden_dim'])

        self.decoder = decoders.StackedAttentionLSTM(config=config)

        self.output_projection = nn.Linear(self.options['dec_hidden_dim'],
                                           self.vocab_size)
        self.softmax = nn.Softmax(dim=-1)
        self.init_weights()
예제 #2
0
    def __init__(
        self,
        src_vocab_size,
        tgt_vocab_size,
        pad_id_src,
        pad_id_tgt,
        config=None,
    ):
        """Initialize model."""
        super(SeqModel, self).__init__()
        self.src_vocab_size = src_vocab_size
        self.tgt_vocab_size = tgt_vocab_size
        self.pad_id_src = pad_id_src
        self.pad_id_tgt = pad_id_tgt
        self.batch_size = config['data']['batch_size']
        self.config = config
        self.options = config['model']
        self.model_type = config['model']['model_type']

        self.src_embedding = nn.Embedding(self.src_vocab_size,
                                          self.options['emb_dim'],
                                          self.pad_id_src)

        if self.config['data']['share_vocab']:
            self.tgt_embedding = self.src_embedding
        else:
            self.tgt_embedding = nn.Embedding(self.tgt_vocab_size,
                                              self.options['emb_dim'],
                                              self.pad_id_tgt)

        if self.options['encoder'] == 'lstm':
            self.encoder = encoders.LSTMEncoder(self.options['emb_dim'],
                                                self.options['src_hidden_dim'],
                                                self.options['src_layers'],
                                                self.options['bidirectional'],
                                                self.options['dropout'])
            self.ctx_bridge = nn.Linear(self.options['src_hidden_dim'],
                                        self.options['tgt_hidden_dim'])

        else:
            raise NotImplementedError('unknown encoder type')

        # # # # # #  # # # # # #  # # # # #  NEW STUFF FROM STD SEQ2SEQ

        if self.model_type == 'delete':
            self.attribute_embedding = nn.Embedding(
                num_embeddings=2, embedding_dim=self.options['emb_dim'])
            attr_size = self.options['emb_dim']

        elif self.model_type == 'delete_retrieve':
            self.attribute_encoder = encoders.LSTMEncoder(
                self.options['emb_dim'],
                self.options['src_hidden_dim'],
                self.options['src_layers'],
                self.options['bidirectional'],
                self.options['dropout'],
                pack=False)
            attr_size = self.options['src_hidden_dim']

        elif self.model_type == 'seq2seq':
            attr_size = 0

        else:
            raise NotImplementedError('unknown model type')

        self.c_bridge = nn.Linear(attr_size + self.options['src_hidden_dim'],
                                  self.options['tgt_hidden_dim'])
        self.h_bridge = nn.Linear(attr_size + self.options['src_hidden_dim'],
                                  self.options['tgt_hidden_dim'])

        # # # # # #  # # # # # #  # # # # # END NEW STUFF

        self.decoder = decoders.StackedAttentionLSTM(config=config)

        self.output_projection = nn.Linear(self.options['tgt_hidden_dim'],
                                           tgt_vocab_size)

        self.softmax = nn.Softmax(dim=-1)

        self.init_weights()
예제 #3
0
    def __init__(
        self,
        src_vocab_size,
        tgt_vocab_size,
        pad_id_src,
        pad_id_tgt,
        config=None,
    ):
        """Initialize model."""
        super(SeqModel, self).__init__()
        self.src_vocab_size = src_vocab_size
        self.tgt_vocab_size = tgt_vocab_size
        self.pad_id_src = pad_id_src
        self.pad_id_tgt = pad_id_tgt
        self.batch_size = config['data']['batch_size']
        self.config = config
        self.options = config['model']
        self.model_type = config['model']['model_type']

        self.src_embedding = nn.Embedding(self.src_vocab_size,
                                          self.options['emb_dim'],
                                          self.pad_id_src)

        if self.config['data']['share_vocab']:
            self.tgt_embedding = self.src_embedding
        else:
            self.tgt_embedding = nn.Embedding(self.tgt_vocab_size,
                                              self.options['emb_dim'],
                                              self.pad_id_tgt)

        if self.options['encoder'] == 'lstm':
            self.encoder = encoders.LSTMEncoder(self.options['emb_dim'],
                                                self.options['src_hidden_dim'],
                                                self.options['src_layers'],
                                                self.options['bidirectional'],
                                                self.options['dropout'])
            self.ctx_bridge = nn.Linear(self.options['src_hidden_dim'],
                                        self.options['tgt_hidden_dim'])

        else:
            raise NotImplementedError('unknown encoder type')

        # # # # # #  # # # # # #  # # # # #  NEW STUFF FROM STD SEQ2SEQ

        if self.model_type == 'delete':
            self.attribute_embedding = nn.Embedding(
                num_embeddings=2, embedding_dim=self.options['emb_dim'])
            attr_size = self.options['emb_dim']

        elif self.model_type in 'delete_retrieve':
            self.attribute_encoder = encoders.LSTMEncoder(
                self.options['emb_dim'],
                self.options['src_hidden_dim'],
                self.options['src_layers'],
                self.options['bidirectional'],
                self.options['dropout'],
                pack=False)
            attr_size = self.options['src_hidden_dim']

        elif self.model_type == 'seq2seq':
            attr_size = 0

        else:
            raise NotImplementedError('unknown model type')

        self.c_bridge = nn.Linear(self.options['src_hidden_dim'],
                                  self.options['tgt_hidden_dim'])
        self.h_bridge = nn.Linear(attr_size + self.options['src_hidden_dim'],
                                  self.options['tgt_hidden_dim'])

        if self.config['experimental']['predict_sides']:
            if self.config['experimental']['side_attn_type'] == 'feedforward':
                self.side_attn = ops.FeedForwardAttention(
                    input_dim=self.options['src_hidden_dim'],
                    hidden_dim=self.options['src_hidden_dim'],
                    layers=2,
                    dropout=self.options['dropout'])
            elif self.config['experimental']['side_attn_type'] == 'dot':
                self.side_attn = ops.BilinearAttention(
                    hidden=self.options['src_hidden_dim'])
            elif self.config['experimental']['side_attn_type'] == 'bahdanau':
                self.side_attn = ops.BilinearAttention(
                    hidden=self.options['src_hidden_dim'], score_fn='bahdanau')

            self.side_predictor = ops.FFNN(
                input_dim=self.options['src_hidden_dim'],
                hidden_dim=self.options['src_hidden_dim'],
                output_dim=self.config['experimental']
                ['n_side_outputs'],  # TODO -- SET SOMEWHERE
                nlayers=2,
                dropout=self.options['dropout'])

            if self.config['experimental']['add_side_embeddings']:
                self.side_embeddings = nn.Parameter(torch.zeros(
                    self.config['experimental']['n_side_outputs'],
                    self.options['emb_dim']),
                                                    requires_grad=True)
                self.h_compression = nn.Linear(
                    self.options['emb_dim'] + self.options['src_hidden_dim'],
                    self.options['tgt_hidden_dim'])
                self.side_softmax = nn.Softmax(dim=-1)

        # # # # # #  # # # # # #  # # # # # END NEW STUFF

        self.decoder = decoders.StackedAttentionLSTM(config=config)

        self.output_projection = nn.Linear(self.options['tgt_hidden_dim'],
                                           tgt_vocab_size)

        self.softmax = nn.Softmax(dim=-1)

        self.init_weights()