コード例 #1
0
ファイル: paraphrase.py プロジェクト: yzhen-li/tranX
    def _score(self, src_codes, tgt_nls):
        """score examples sorted by code length"""
        args = self.args

        if args.tie_embed:
            src_code_var = self.to_input_variable_with_unk_handling(
                src_codes, cuda=args.cuda).t()
            tgt_nl_var = self.to_input_variable_with_unk_handling(
                tgt_nls, cuda=args.cuda).t()
        else:
            src_code_var = nn_utils.to_input_variable(src_codes,
                                                      self.vocab.code,
                                                      cuda=args.cuda).t()
            tgt_nl_var = nn_utils.to_input_variable(tgt_nls,
                                                    self.vocab.source,
                                                    cuda=args.cuda).t()

        src_code_mask = Variable(nn_utils.length_array_to_mask_tensor(
            [len(x) for x in src_codes],
            cuda=args.cuda,
            valid_entry_has_mask_one=True).float(),
                                 requires_grad=False)
        tgt_nl_mask = Variable(nn_utils.length_array_to_mask_tensor(
            [len(x) for x in tgt_nls],
            cuda=args.cuda,
            valid_entry_has_mask_one=True).float(),
                               requires_grad=False)

        scores = self.pi_model(src_code_var, tgt_nl_var, src_code_mask,
                               tgt_nl_mask)

        return scores
コード例 #2
0
    def forward(self, src_sents_var, src_sents_len, tgt_sents_var,
                tgt_token_copy_pos, tgt_token_copy_mask, tgt_token_gen_mask):
        """
        compute log p(y|x)

        :param tgt_token_copy_pos: Variable(tgt_action_len, batch_size)
        :param tgt_token_copy_mask: Variable(tgt_action_len, batch_size)
        :return: Variable(batch_size)
        """

        src_encodings, (last_state,
                        last_cell) = self.encode(src_sents_var, src_sents_len)
        dec_init_vec = self.init_decoder_state(last_state, last_cell)

        # (batch_size, src_sent_len)
        src_sent_masks = nn_utils.length_array_to_mask_tensor(src_sents_len,
                                                              cuda=self.cuda)

        # (tgt_sent_len - 1, batch_size, hidden_size)
        att_vecs = self.decode(src_encodings, src_sent_masks, dec_init_vec,
                               tgt_sents_var)

        # (tgt_sent_len - 1, batch_size, 2)
        tgt_token_predictor = F.softmax(self.tgt_token_predictor(att_vecs),
                                        dim=-1)

        # (tgt_sent_len - 1, batch_size, tgt_vocab_size)
        token_gen_prob = F.softmax(self.readout(att_vecs), dim=-1)
        # (tgt_sent_len - 1, batch_size, src_sent_len)
        token_copy_prob = self.src_pointer_net(src_encodings, src_sent_masks,
                                               att_vecs)

        tgt_token_idx = tgt_sents_var[1:]  # remove leading </s>
        tgt_token_copy_pos = tgt_token_copy_pos[1:]
        tgt_token_gen_mask = tgt_token_gen_mask[1:]
        tgt_token_copy_mask = tgt_token_copy_mask[1:]

        # (tgt_sent_len - 1, batch_size)
        tgt_token_gen_prob = torch.gather(
            token_gen_prob, dim=2,
            index=tgt_token_idx.unsqueeze(2)).squeeze(2) * tgt_token_gen_mask

        # (tgt_sent_len - 1, batch_size)
        tgt_token_copy_prob = torch.gather(
            token_copy_prob, dim=2, index=tgt_token_copy_pos.unsqueeze(
                2)).squeeze(2) * tgt_token_copy_mask

        tgt_token_mask = torch.gt(tgt_token_gen_mask + tgt_token_copy_mask,
                                  0.).float()
        tgt_token_prob = torch.log(
            tgt_token_predictor[:, :, 0] * tgt_token_gen_prob +
            tgt_token_predictor[:, :, 1] * tgt_token_copy_prob + 1.e-7 *
            (1. - tgt_token_mask))
        tgt_token_prob = tgt_token_prob * tgt_token_mask

        # (batch_size)
        scores = tgt_token_prob.sum(dim=0)

        return scores
コード例 #3
0
    def sample_from_src_variable(self, src_sents_var, src_sents_len, sample_size):
        # (batch_size * sample_size, src_sent_len)
        src_sent_masks = nn_utils.length_array_to_mask_tensor(
            list(chain.from_iterable([l] * sample_size for l in src_sents_len)),
            cuda=self.cuda)

        src_encodings, (last_state, last_cell) = self.encode(src_sents_var, src_sents_len)
        dec_init_vec = self.init_decoder_state(last_state, last_cell)

        return self.sample_from_src_encoding(src_encodings, dec_init_vec, sample_size, src_sent_masks)
コード例 #4
0
ファイル: seq2seq.py プロジェクト: chubbymaggie/tranX
    def sample_from_src_variable(self, src_sents_var, src_sents_len, sample_size):
        # (batch_size * sample_size, src_sent_len)
        src_sent_masks = nn_utils.length_array_to_mask_tensor(
            list(chain.from_iterable([l] * sample_size for l in src_sents_len)),
            cuda=self.cuda)

        src_encodings, (last_state, last_cell) = self.encode(src_sents_var, src_sents_len)
        dec_init_vec = self.init_decoder_state(last_state, last_cell)

        return self.sample_from_src_encoding(src_encodings, dec_init_vec, sample_size, src_sent_masks)
コード例 #5
0
ファイル: transformer_enc.py プロジェクト: kzCassie/ucl_nlp
    def encode(self, src_sents_var, src_sents_len):
        """Encode the input natural language utterance

        Args:
            src_sents_var: a variable of shape (src_sent_len, batch_size), representing word ids of the input
            src_sents_len: a list of lengths of input source sentences, sorted by descending order

        Returns:
            src_encodings: source encodings of shape (batch_size, src_sent_len, hidden_size)
            last_state, last_cell: the last hidden state and cell state of the encoder,
                                   of shape (batch_size, hidden_size)
        """
        args = self.args

        # (src_sent_len, batch_size, embed_size)
        # apply word dropout
        if self.training and args.word_dropout:
            mask = Variable(
                self.new_tensor(src_sents_var.size()).fill_(
                    1. - args.word_dropout).bernoulli().long())
            src_sents_var = src_sents_var * mask + (
                1 - mask) * self.vocab.source.unk_id

        # (src_sent_len, batch_size, hidden_size)
        src_enc_vec = torch.tanh(
            self.src_enc_linear(self.src_embed(src_sents_var)))
        # (src_sent_len, batch_size, hidden_size)
        src = self.src_pos_encoder(src_enc_vec * math.sqrt(args.embed_size))
        # (src_sent_len,src_sent_len)
        src_mask = generate_square_subsequent_mask(src.shape[0], args.cuda)
        # (batch_size, src_sent_len)
        src_key_padding_mask = length_array_to_mask_tensor(
            src_sents_len, args.cuda)

        # (src_sent_len, batch_size, hidden_state)
        src_encodings = self.transformer_encoder(src, src_mask,
                                                 src_key_padding_mask)

        # TODO: shape assertion
        src_sent_len, batch_size = src_sents_var.shape
        assert (src_enc_vec.shape == (src_sent_len, batch_size,
                                      args.hidden_size))
        assert (src.shape == (src_sent_len, batch_size, args.hidden_size))
        assert (src_mask.shape == (src_sent_len, src_sent_len))
        assert (src_key_padding_mask.shape == (batch_size, src_sent_len))
        assert (src_encodings.shape == (src_sent_len, batch_size,
                                        args.hidden_size))

        src_encodings = src_encodings.permute(1, 0, 2)
        # last_state = src_encodings[:, 0, :]
        last_state = src_encodings.mean(1)
        last_cell = last_state
        return src_encodings, (last_state, last_cell)
コード例 #6
0
    def decode(self, src_encodings, src_sents_len, dec_init_vec, tgt_sents_var):
        """
        compute the final softmax layer at each decoding step
        :param src_encodings: Variable(src_sent_len, batch_size, hidden_size * 2)
        :param src_sents_len: list[int]
        :param dec_init_vec: tuple((batch_size, hidden_size))
        :param tgt_sents_var: Variable(tgt_sent_len, batch_size)
        :return:
            scores: Variable(src_sent_len, batch_size, src_vocab_size)
        """
        new_tensor = src_encodings.data.new
        batch_size = src_encodings.size(1)

        h_tm1 = dec_init_vec
        # (batch_size, query_len, hidden_size * 2)
        src_encodings = src_encodings.permute(1, 0, 2)
        # (batch_size, query_len, hidden_size)
        src_encodings_att_linear = self.att_src_linear(src_encodings)
        # initialize the attentional vector
        att_tm1 = Variable(new_tensor(batch_size, self.hidden_size).zero_(), requires_grad=False)
        # (batch_size, src_sent_len)
        src_sent_masks = nn_utils.length_array_to_mask_tensor(src_sents_len, cuda=self.cuda)

        # (tgt_sent_len, batch_size, embed_size)
        tgt_token_embed = self.tgt_embed(tgt_sents_var)

        scores = []
        # start from `<s>`, until y_{T-1}
        for t, y_tm1_embed in list(enumerate(tgt_token_embed.split(split_size=1)))[:-1]:
            # input feeding: concate y_tm1 and previous attentional vector
            # split() keeps the first dim
            y_tm1_embed = y_tm1_embed.squeeze(0)
            if t > 0 and self.decoder_word_dropout:
                # (batch_size)
                y_tm1_mask = Variable(torch.bernoulli(new_tensor(batch_size).fill_(1 - self.decoder_word_dropout)))
                y_tm1_embed = y_tm1_embed * y_tm1_mask.unsqueeze(1)

            x = torch.cat([y_tm1_embed, att_tm1], 1)

            (h_t, cell_t), att_t, score_t = self.step(x, h_tm1,
                                                      src_encodings, src_encodings_att_linear,
                                                      src_sent_masks=src_sent_masks)

            scores.append(score_t)

            att_tm1 = att_t
            h_tm1 = (h_t, cell_t)

        # (src_sent_len, batch_size, tgt_vocab_size)
        scores = torch.stack(scores)

        return scores
コード例 #7
0
ファイル: seq2seq.py プロジェクト: chubbymaggie/tranX
    def decode(self, src_encodings, src_sents_len, dec_init_vec, tgt_sents_var):
        """
        compute the final softmax layer at each decoding step
        :param src_encodings: Variable(src_sent_len, batch_size, hidden_size * 2)
        :param src_sents_len: list[int]
        :param dec_init_vec: tuple((batch_size, hidden_size))
        :param tgt_sents_var: Variable(tgt_sent_len, batch_size)
        :return:
            scores: Variable(src_sent_len, batch_size, src_vocab_size)
        """
        new_tensor = src_encodings.data.new
        batch_size = src_encodings.size(1)

        h_tm1 = dec_init_vec
        # (batch_size, query_len, hidden_size * 2)
        src_encodings = src_encodings.permute(1, 0, 2)
        # (batch_size, query_len, hidden_size)
        src_encodings_att_linear = self.att_src_linear(src_encodings)
        # initialize the attentional vector
        att_tm1 = Variable(new_tensor(batch_size, self.hidden_size).zero_(), requires_grad=False)
        # (batch_size, src_sent_len)
        src_sent_masks = nn_utils.length_array_to_mask_tensor(src_sents_len, cuda=self.cuda)

        # (tgt_sent_len, batch_size, embed_size)
        tgt_token_embed = self.tgt_embed(tgt_sents_var)

        scores = []
        # start from `<s>`, until y_{T-1}
        for t, y_tm1_embed in list(enumerate(tgt_token_embed.split(split_size=1)))[:-1]:
            # input feeding: concate y_tm1 and previous attentional vector
            # split() keeps the first dim
            y_tm1_embed = y_tm1_embed.squeeze(0)
            if t > 0 and self.decoder_word_dropout:
                # (batch_size)
                y_tm1_mask = Variable(torch.bernoulli(new_tensor(batch_size).fill_(1 - self.decoder_word_dropout)))
                y_tm1_embed = y_tm1_embed * y_tm1_mask.unsqueeze(1)

            x = torch.cat([y_tm1_embed, att_tm1], 1)

            (h_t, cell_t), att_t, score_t = self.step(x, h_tm1,
                                                      src_encodings, src_encodings_att_linear,
                                                      src_sent_masks=src_sent_masks)

            scores.append(score_t)

            att_tm1 = att_t
            h_tm1 = (h_t, cell_t)

        # (src_sent_len, batch_size, tgt_vocab_size)
        scores = torch.stack(scores)

        return scores
コード例 #8
0
ファイル: transformer.py プロジェクト: kzCassie/ucl_nlp
    def step(self, x, src_encodings, src_key_padding_mask, hyp_len):
        """
        At each step during inference time, x contains embeddings of tentative hypothesis. We need to mask
        appropriately and pass the entire x into the transformer decoder to get the updated att_vec for each
        hypothesis.

        Args:
            x: tgt inputs of shape (t, hyp_num, input_dim), t is the max hypothesis length at step t during inference.
            src_encodings: variable of shape (src_sent_len, batch_size, hidden_size), encodings of source utterances.
            src_key_padding_mask: to be used as the memory_key_padding_mask for the attention decoder.
            hyp_len: in-progress hypothesis length np.array of shape (hyp_num,). All values = t.

        Returns:
            att_t: output of the transformer decoder for the t-th step of the shape (hyp_num, hidden_size).
        """
        tgt_action_len = x.shape[0]
        batch_size = x.shape[1]
        args = self.args

        # Transformer decoder
        # (tgt_action_len, batch_size, hidden_size)
        tgt_dec_vec = torch.tanh(self.tgt_dec_linear(x))
        # (tgt_action_len, batch_size, hidden_size)
        tgt = self.tgt_pos_encoder(tgt_dec_vec * math.sqrt(self.input_dim))
        # (tgt_action_len, tgt_action_len)
        tgt_mask = generate_square_subsequent_mask(tgt_action_len)
        memory_mask = None
        # (batch_size, tgt_action_len)
        tgt_key_padding_mask = length_array_to_mask_tensor(hyp_len, args.cuda)
        # (batch_size, src_sent_len)
        memory_key_padding_mask = src_key_padding_mask.clone()

        # TODO: shape assertion
        assert (tgt_dec_vec.shape == (tgt_action_len, batch_size,
                                      args.hidden_size))
        assert (tgt.shape == (tgt_action_len, batch_size, args.hidden_size))
        assert (tgt_mask.shape == (tgt_action_len, tgt_action_len))
        assert (tgt_key_padding_mask.shape == (batch_size, tgt_action_len))
        # assert(memory_key_padding_mask.shape[0]==batch_size) TODO:incompatible batch_size during inference time

        att_vecs = self.transformer_decoder(tgt,
                                            src_encodings,
                                            tgt_mask,
                                            memory_mask,
                                            tgt_key_padding_mask,
                                            memory_key_padding_mask=None)
        assert (att_vecs.shape == (tgt_action_len, batch_size,
                                   args.hidden_size))

        return att_vecs[-1, :, :]
コード例 #9
0
ファイル: dataset.py プロジェクト: liuhuigmail/CoDas4CG
 def src_token_mask(self):
     return nn_utils.length_array_to_mask_tensor(self.src_sents_len,
                                                 cuda=self.cuda)
コード例 #10
0
 def tgt_token_mask_usual(self):
     return nn_utils.length_array_to_mask_tensor(self.tgt_actions_len, cuda=self.cuda, valid_entry_has_mask_one=True)
コード例 #11
0
 def src_token_mask_usual(self):
     return nn_utils.length_array_to_mask_tensor(self.src_sents_len, cuda=self.cuda, valid_entry_has_mask_one=True)
コード例 #12
0
ファイル: dataset.py プロジェクト: chubbymaggie/tranX
 def src_token_mask(self):
     return nn_utils.length_array_to_mask_tensor(self.src_sents_len,
                                                 cuda=self.cuda)