Beispiel #1
0
    def __init__(self, params, params_2, path):
        super(RVAE, self).__init__()

        self.params = params
        self.params_2 = params_2  # Encoder-2 parameters

        self.embedding = Embedding(self.params, path)
        self.embedding_2 = Embedding(self.params_2, path, True)

        self.encoder_original = Encoder(self.params)
        if self.params.hrvae:
            self.encoder_paraphrase = EncoderHR(self.params_2)
        else:
            self.encoder_paraphrase = Encoder(self.params_2)

        # self.embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        if self.params.res_model:
            self.decoder = DecoderResidual(self.params_2)
        else:
            self.decoder = Decoder(self.params_2)
Beispiel #2
0
    def __init__(self, params, prefix=''):
        super(RVAE_dilated, self).__init__()

        self.params = params

        self.embedding = Embedding(self.params, '', prefix)

        self.encoder = Encoder(self.params)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        if self.params.decoder_type == 'gru' or self.params.decoder_type == 'gru_emb':
            self.decoder = DecoderGRU(self.params)
        elif self.params.decoder_type == 'lstm':
            self.decoder = DecoderLSTM(self.params)
        elif self.params.decoder_type == 'dilation':
            self.decoder = Decoder(self.params)

        params_size = 0
        params_num = 0
        for p in self.parameters():
            param_size = 1
            for s in p.size():
                param_size = param_size * s
            if p.requires_grad: params_size = params_size + param_size
            if p.requires_grad: params_num = params_num + 1
            #if p.requires_grad: print('Grad Param', type(p.data), p.size())
        print('RVAE parameters num[%s] size[%s]' % (params_num, params_size))
Beispiel #3
0
    def __init__(self, params, regularised):
        super(RVAE_dilated, self).__init__()

        self.params = params

        self.embedding = Embedding(self.params, '')

        self.regularised = regularised

        if self.regularised:
            print("Highly regularised Encoder")
            self.encoder = HREncoder(self.params)
            self.layer_dim = self.params.encoder_num_layers * 2 * self.params.encoder_rnn_size
            self.context_to_mu = nn.Linear(self.layer_dim * 2,
                                           self.params.latent_variable_size)
            self.context_to_logvar = nn.Linear(
                self.layer_dim * 2, self.params.latent_variable_size)
        elif not self.regularised:
            print('Classic encoder')
            self.encoder = Encoder(self.params)
            self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)
            self.context_to_logvar = nn.Linear(
                self.params.encoder_rnn_size * 2,
                self.params.latent_variable_size)

        self.decoder = Decoder(self.params)
Beispiel #4
0
    def __init__(self, params):
        super(RVAE, self).__init__()

        self.params = params

        self.embedding = Embedding(self.params, '')

        self.encoder = Encoder(self.params)

        self.context_to_mu = nn.Linear(self.params.latent_variable_size, self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.latent_variable_size, self.params.latent_variable_size)

        self.decoder = Decoder(self.params)
    def __init__(self, params):
        super(RVAE, self).__init__()

        self.params = params

        self.embedding = Embedding(self.params, '')

        self.original_encoder = OriginalEncoder(self.params)
        self.paraphrase_encoder = ParaEncoder(self.params)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        self.decoder = Decoder(self.params)
Beispiel #6
0
    def __init__(self, params,params_2):
        super(RVAE, self).__init__()

        self.params = params
        self.params_2 = params_2        #Encoder-2 parameters

        self.embedding = Embedding(self.params, '')
        self.embedding_2 = Embedding(self.params_2, '')

        self.encoder = Encoder(self.params)
        self.encoder_2 = Encoder(self.params_2)


        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)

        self.encoder_3 = Encoder(self.params)
        self.decoder = Decoder(self.params_2)         #change this to params_2
    def __init__(self, params: object, params_2: object, path: str) -> None:
        """
        [summary] initializes the RVAE  with the correct parameters and data files

        Args:
            params (object): [description] parameters for original encoder
            params_2 (object): [description] parameters for paraphrase encoder
            path (str): [description] a path to the data files
        """
        super(RVAE, self).__init__()

        self.params = params
        self.params_2 = params_2

        self.embedding = Embedding(self.params, path)
        self.embedding_2 = Embedding(self.params_2, path, True)

        self.encoder_original = Encoder(self.params)
        if self.params.hrvae:
            self.encoder_paraphrase = EncoderHR(self.params_2)
        else:
            self.encoder_paraphrase = Encoder(self.params_2)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        if self.params.attn_model and self.params.res_model:
            self.decoder = DecoderResidualAttention(self.params_2)
        elif self.params.attn_model:
            self.decoder = DecoderAttention(self.params_2)
        elif self.params.res_model:
            self.decoder = DecoderResidual(self.params_2)
        else:
            self.decoder = Decoder(self.params_2)
Beispiel #8
0
class RVAE(nn.Module):
    def __init__(self, params):
        super(RVAE, self).__init__()

        self.params = params

        self.embedding = Embedding(self.params, '')

        self.encoder = Encoder(self.params)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        self.decoder = Decoder(self.params)

    def forward(self,
                drop_prob,
                encoder_word_input=None,
                encoder_character_input=None,
                decoder_word_input=None,
                decoder_character_input=None,
                z=None,
                initial_state=None):
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        #         assert parameters_allocation_check(self), \
        #             'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input],
                                  True) \
            or (z is not None and decoder_word_input is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            '''
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)

            context = self.encoder(encoder_input)

            mu = self.context_to_mu(context)
            logvar = self.context_to_logvar(context)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                1)).mean().squeeze()
        else:
            kld = None

        decoder_input = self.embedding.word_embed(decoder_word_input)
        out, final_state = self.decoder(decoder_input, z, drop_prob,
                                        initial_state)

        return out, final_state, kld

    def learnable_parameters(self):

        # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer
        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer, batch_loader):
        def train(i, batch_size, use_cuda, dropout):
            input = batch_loader.next_batch(batch_size, 'train')
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input

            logits, _, kld = self(dropout,
                                  encoder_word_input,
                                  encoder_character_input,
                                  decoder_word_input,
                                  decoder_character_input,
                                  z=None)

            logits = logits.view(-1, self.params.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            loss = 79 * cross_entropy + kld_coef(i) * kld

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            return cross_entropy, kld, kld_coef(i)

        return train

    def validater(self, batch_loader):
        def validate(batch_size, use_cuda):
            input = batch_loader.next_batch(batch_size, 'valid')
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input

            logits, _, kld = self(0.,
                                  encoder_word_input,
                                  encoder_character_input,
                                  decoder_word_input,
                                  decoder_character_input,
                                  z=None)

            logits = logits.view(-1, self.params.word_vocab_size)
            target = target.view(-1)

            cross_entropy = F.cross_entropy(logits, target)

            return cross_entropy, kld

        return validate

    def sample(self, batch_loader, seq_len, seed, use_cuda):
        seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        result = ''

        initial_state = None

        for i in range(seq_len):
            logits, initial_state, _ = self(0., None, None, decoder_word_input,
                                            decoder_character_input, seed,
                                            initial_state)

            logits = logits.view(-1, self.params.word_vocab_size)
            prediction = F.softmax(logits)

            word = batch_loader.sample_word_from_distribution(
                prediction.data.cpu().numpy()[-1])

            if word == batch_loader.end_token:
                break

            result += ' ' + word

            decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]]
                                              ])
            decoder_character_input_np = np.array(
                [[batch_loader.encode_characters(word)]])

            decoder_word_input = Variable(
                t.from_numpy(decoder_word_input_np).long())
            decoder_character_input = Variable(
                t.from_numpy(decoder_character_input_np).long())

            if use_cuda:
                decoder_word_input, decoder_character_input = decoder_word_input.cuda(
                ), decoder_character_input.cuda()

        return result
Beispiel #9
0
class RVAE_dilated(nn.Module):
    def __init__(self, params, prefix=''):
        super(RVAE_dilated, self).__init__()

        self.params = params

        self.embedding = Embedding(self.params, '', prefix)

        self.encoder = Encoder(self.params)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        if self.params.decoder_type == 'gru' or self.params.decoder_type == 'gru_emb':
            self.decoder = DecoderGRU(self.params)
        elif self.params.decoder_type == 'lstm':
            self.decoder = DecoderLSTM(self.params)
        elif self.params.decoder_type == 'dilation':
            self.decoder = Decoder(self.params)

        params_size = 0
        params_num = 0
        for p in self.parameters():
            param_size = 1
            for s in p.size():
                param_size = param_size * s
            if p.requires_grad: params_size = params_size + param_size
            if p.requires_grad: params_num = params_num + 1
            #if p.requires_grad: print('Grad Param', type(p.data), p.size())
        print('RVAE parameters num[%s] size[%s]' % (params_num, params_size))

    def forward(self,
                drop_prob,
                encoder_word_input=None,
                encoder_character_input=None,
                decoder_word_input=None,
                z=None,
                initial_state=None):
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 kld loss estimation
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        if not self.params.word_is_char:
            assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                      [encoder_word_input, encoder_character_input, decoder_word_input],
                                      True) \
                   or (z is not None and decoder_word_input is not None), \
                "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            '''
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)

            context = self.encoder(encoder_input)

            mu = self.context_to_mu(context)
            logvar = self.context_to_logvar(context)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                1)).mean().squeeze()
        else:
            kld = None

        decoder_input = self.embedding.word_embed(decoder_word_input)
        logits_out, final_state = self.decoder(decoder_input, z, drop_prob,
                                               initial_state)

        return logits_out, kld, z, final_state

    def learnable_parameters(self):

        # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer
        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer, batch_loader):
        perplexity = Perplexity()

        def train(i, batch_size, use_cuda, dropout):
            input = batch_loader.next_batch(batch_size, 'train')
            input = [(Variable(t.from_numpy(var)) if var is not None else None)
                     for var in input]
            input = [(var.long() if var is not None else None)
                     for var in input]
            input = [(var.cuda() if var is not None and use_cuda else var)
                     for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, _, target
            ] = input

            logits_out, kld, _, _ = self(dropout,
                                         encoder_word_input,
                                         encoder_character_input,
                                         decoder_word_input,
                                         z=None,
                                         initial_state=None)
            if self.params.decoder_type == 'dilation' or self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm':
                logits = logits_out.view(-1, self.params.word_vocab_size)
                target = target.view(-1)
                cross_entropy = F.cross_entropy(logits, target)

                # since cross enctropy is averaged over seq_len, it is necessary to approximate new kld
                loss = 79 * cross_entropy + kld_coef(i) * kld

                logits = logits.view(batch_size, -1,
                                     self.params.word_vocab_size)
                target = target.view(batch_size, -1)
                ppl = perplexity(logits, target).mean()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                return ppl, kld, None
            elif self.params.decoder_type == 'gru_emb':
                decoder_target = self.embedding(target, None)
                error = t.pow(logits_out - decoder_target, 2).mean()
                '''
                loss is constructed fromaveraged over whole batches error 
                formed from squared error between output and target
                and KL Divergence between p(z) and q(z|x)
                '''
                loss = 400 * error + kld_coef(i) * kld

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                return error, kld, kld_coef(i)

        return train

    def validater(self, batch_loader):
        perplexity = Perplexity()

        def validate(batch_size, use_cuda):
            input = batch_loader.next_batch(batch_size, 'valid')
            input = [
                Variable(t.from_numpy(var)) if var is not None else None
                for var in input
            ]
            input = [var.long() if var is not None else None for var in input]
            input = [
                var.cuda() if use_cuda and var is not None else var
                for var in input
            ]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, _, target
            ] = input

            logits_out, kld, _, _ = self(0.,
                                         encoder_word_input,
                                         encoder_character_input,
                                         decoder_word_input,
                                         z=None,
                                         initial_state=None)
            if self.params.decoder_type == 'dilation' or self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm':
                ppl = perplexity(logits_out, target).mean()

                return ppl, kld
            elif self.params.decoder_type == 'gru_emb':
                decoder_target = self.embedding(target, None)
                error = t.pow(logits_out - decoder_target, 2).mean()

                return error, kld

        return validate

    def style(self, batch_loader, seq, use_cuda, sample_size=30):
        decoder_word_input_np, _ = batch_loader.go_input(1)
        encoder_wids = []
        for i in range(len(seq)):
            word = seq[i]
            wid = batch_loader.word_to_idx[word]
            word = np.array([[wid]])
            decoder_word_input_np = np.append(decoder_word_input_np, word, 1)
            encoder_wids.append(wid)
        encoder_wids = encoder_wids[::-1]
        encoder_word_input_np = np.array([encoder_wids])
        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np)).long()
        encoder_word_input = Variable(
            t.from_numpy(encoder_word_input_np)).long()
        decoder_word_input = t.cat([decoder_word_input] * sample_size, 0)
        encoder_word_input = t.cat([encoder_word_input] * sample_size, 0)
        if use_cuda:
            decoder_word_input = decoder_word_input.cuda()
            encoder_word_input = encoder_word_input.cuda()
        if self.params.word_is_char:  #TODO only for chinese word right now
            logits_out, kld, z, final_state = self(0.,
                                                   encoder_word_input,
                                                   None,
                                                   decoder_word_input,
                                                   z=None,
                                                   initial_state=None)
            return z.data.cpu().numpy()
        return None

    def sample(self,
               batch_loader,
               seq_len,
               seeds,
               use_cuda,
               template=None,
               beam_size=50):
        (z_num, _) = seeds.shape
        print("z sample size", z_num, "beam size", beam_size)
        beam_sent_wids, _ = batch_loader.go_input(1)
        beam_sent_last_wid = beam_sent_wids[:, -1:]
        results = []
        end_token_id = batch_loader.word_to_idx[batch_loader.end_token]
        initial_state = None
        sentence = []

        for i in range(seq_len):
            beam_sent_num = len(beam_sent_wids)
            if beam_sent_num == 0:
                break
            if len(results) >= beam_size:
                break
            if self.params.decoder_type == 'dilation' or not self.params.decoder_stateful:
                beam_z_sent_wids = np.repeat(
                    beam_sent_wids, [z_num],
                    axis=0) if z_num > 1 else beam_sent_wids
            elif self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm' or self.params.decoder_type == 'gru_emb':
                beam_z_sent_wids = np.repeat(
                    beam_sent_last_wid, [z_num],
                    axis=0) if z_num > 1 else beam_sent_last_wid
            decoder_word_input = Variable(
                t.from_numpy(beam_z_sent_wids).long())
            decoder_word_input = decoder_word_input.cuda(
            ) if use_cuda else decoder_word_input
            beam_seeds = Variable(t.from_numpy(seeds).float())
            beam_seeds = t.cat([beam_seeds] * beam_sent_num,
                               0) if beam_sent_num > 1 else beam_seeds
            beam_seeds = beam_seeds.cuda() if use_cuda else beam_seeds
            if not self.params.decoder_stateful:
                initial_state = None
            elif initial_state is not None and z_num > 1:
                initial_state = initial_state.view(
                    -1, 1, self.params.decoder_rnn_size)
                initial_state = initial_state.repeat(1, z_num, 1)
                initial_state = initial_state.view(
                    self.params.decoder_num_layers, -1,
                    self.params.decoder_rnn_size)
            beam_sent_logps = None
            if template and len(template) > i and template[i] != '#':
                beam_sent_wids = np.column_stack(
                    (beam_sent_wids,
                     [batch_loader.word_to_idx[template[i]]] * beam_sent_num))
                beam_sent_last_wid = beam_sent_wids[:, -1:]
            else:
                logits_out, _, _, initial_state = self(0., None, None,
                                                       decoder_word_input,
                                                       beam_seeds,
                                                       initial_state)
                if self.params.decoder_type == 'dilation' or self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm':
                    [b_z_n, sl, _] = logits_out.size()
                    logits = logits_out.view(-1, self.params.word_vocab_size)
                    prediction = F.softmax(logits)
                    prediction = prediction.view(beam_sent_num, z_num, sl, -1)
                    # take mean of sentence vocab probs for each beam group
                    beam_sent_vps = np.mean(prediction.data.cpu().numpy(), 1)
                    # get vocab probs of the sentence last word for each beam group
                    beam_last_vps = beam_sent_vps[:, -1]
                    beam_last_word_size = min(batch_loader.words_vocab_size,
                                              beam_size)
                    # choose last word candidate ids for each beam group
                    beam_choosed_wids = np.array([
                        np.random.choice(range(batch_loader.words_vocab_size),
                                         beam_last_word_size,
                                         replace=False,
                                         p=last_vps.ravel()).tolist()
                        for last_vps in beam_last_vps
                    ])
                    # print("candidate shape =", beam_choosed_wids.shape)
                    # dumplicate beam sentence word ids for choosed last word size
                    beam_sent_wids = np.repeat(beam_sent_wids,
                                               [beam_last_word_size],
                                               axis=0)
                    beam_sent_wids = np.column_stack(
                        (beam_sent_wids, beam_choosed_wids.reshape(-1)))
                    if not self.params.decoder_stateful:
                        initial_state = None
                    elif initial_state is not None:
                        initial_state = initial_state.view(
                            -1, 1, self.params.decoder_rnn_size)
                        initial_state = initial_state.repeat(
                            1, beam_last_word_size, 1)
                        initial_state = initial_state.view(
                            self.params.decoder_num_layers, -1,
                            self.params.decoder_rnn_size)
                    # get sentence word probs
                    beam_sent_wps = []
                    whole_or_last = 1 if self.params.decoder_type == 'dilation' or not self.params.decoder_stateful else (
                        -1 if self.params.decoder_type == 'gru'
                        or self.params.decoder_type == 'lstm' else 0)
                    for i, sent in enumerate(beam_sent_wids):
                        beam_sent_wps.append([])
                        for j, wid in enumerate(sent[whole_or_last:]):
                            beam_sent_wps[i].append(
                                beam_sent_vps[i //
                                              beam_last_word_size][j][wid])
                    # desc sort sum of the beam sentence log probs
                    beam_sent_logps = np.sum(np.log(beam_sent_wps), axis=1)
                    beam_sent_ids = np.argsort(
                        beam_sent_logps)[-(beam_size - len(results)):][::-1]
                    # get the top beam size sentences
                    beam_sent_wids = beam_sent_wids[beam_sent_ids]
                    beam_sent_logps = np.exp(beam_sent_logps[beam_sent_ids])
                    #print("candidate", "".join([batch_loader.idx_to_word[wid] for wid in beam_sent_wids[:,-1].reshape(-1)]))
                    if initial_state is not None and len(beam_sent_ids) > 0:
                        idx = Variable(t.from_numpy(
                            beam_sent_ids.copy())).long()
                        initial_state = initial_state.index_select(1, idx)
                elif self.params.decoder_type == 'gru_emb':
                    [b_z_n, sl, _] = logits_out.size()
                    #TODO

                    out = logits_out.view(-1, self.params.word_embed_size)
                    similarity = self.embedding.similarity(out)
                    similarity = similarity.data.cpu().numpy()
                    similarity = np.mean(similarity, 0)
                    similarity = similarity.view(beam_sent_num, z_num, sl, -1)
                    beam_last_word_size = min(batch_loader.words_vocab_size,
                                              beam_size)
                    # choose last word candidate ids for each beam group
                    beam_choosed_wids = np.array([
                        np.random.choice(range(batch_loader.words_vocab_size),
                                         beam_last_word_size,
                                         replace=False,
                                         p=last_vps.ravel()).tolist()
                        for last_vps in similarity
                    ])
                    idx = np.random.choice(range(
                        batch_loader.words_vocab_size),
                                           replace=False,
                                           p=similarity.ravel())
                    if idx == end_token_id:
                        break
                    beam_sent_wids = np.array([[idx]])
                    word = batch_loader.idx_to_word[idx]
                    sentence.append(word)
            if self.params.decoder_type == 'dilation' or self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm':
                # check whether some sentence is ended
                keep = []
                for i, sent in enumerate(beam_sent_wids):
                    if sent[-1] == end_token_id:
                        results.append(sent)
                        self.show(
                            batch_loader, sent,
                            beam_sent_logps[i] if beam_sent_logps is not None
                            and len(beam_sent_logps) > i else None)
                    else:
                        keep.append(i)
                beam_sent_wids = beam_sent_wids[keep]
                beam_sent_last_wid = beam_sent_wids[:, -1:]
                #print("last word", "".join([batch_loader.idx_to_word[wid] for wid in beam_sent_last_wid[:,-1].reshape(-1)]))
                if initial_state is not None and len(keep) > 0:
                    idx = Variable(t.from_numpy(np.array(keep))).long()
                    initial_state = initial_state.index_select(1, idx)
        if self.params.decoder_type == 'gru_emb':
            print(u'%s' %
                  ("" if self.params.word_is_char else " ").join(sentence))
            return ""
        else:
            results_len = len(results)
            lack_num = beam_size - results_len
            if lack_num > 0:
                results = results + beam_sent_wids[:lack_num].tolist()
                for i, sent in enumerate(results[-lack_num:]):
                    self.show(
                        batch_loader, sent, beam_sent_logps[i + results_len]
                        if beam_sent_logps is not None
                        and len(beam_sent_logps) > i + results_len else None)
        return results

    def show(self, batch_loader, sent_wids, sent_logp):
        print(u'%s==%s' % (("" if self.params.word_is_char else " ").join(
            [batch_loader.idx_to_word[wid] for wid in sent_wids]), sent_logp))
Beispiel #10
0
class RVAE(nn.Module):
    def __init__(self, params, params_2):
        super(RVAE, self).__init__()

        self.params = params
        self.params_2 = params_2  #Encoder-2 parameters

        self.embedding = Embedding(self.params, '')
        self.embedding_2 = Embedding(self.params_2, '', True)

        self.encoder = Encoder(self.params)
        self.encoder_2 = Encoder(self.params_2)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        # self.encoder_3 = Encoder(self.params)
        self.decoder = Decoder(self.params_2)  #change this to params_2

    def forward(self,
                drop_prob,
                encoder_word_input=None,
                encoder_character_input=None,
                encoder_word_input_2=None,
                encoder_character_input_2=None,
                decoder_word_input_2=None,
                decoder_character_input_2=None,
                z=None,
                initial_state=None):

        #Modified the parameters of forward function according to Encoder-2
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
            or (z is not None and decoder_word_input_2 is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            '''
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)
            ''' ===================================================Doing the same for encoder-2===================================================
            '''
            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               encoder_character_input_2)
            ''' ==================================================================================================================================
            '''

            context, h_0, c_0 = self.encoder(encoder_input, None)

            State = (h_0, c_0)  #Final state of Encoder-1
            context_2, _, _ = self.encoder_2(encoder_input_2,
                                             State)  #Encoder_2 for Ques_2

            mu = self.context_to_mu(context_2)
            logvar = self.context_to_logvar(context_2)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                1)).mean().squeeze()

            # encoder_input = self.embedding(encoder_word_input, encoder_character_input)
            # _ , h_0 , c_0 = self.encoder_3(encoder_input, None)
            initial_state = State  #Final state of Encoder-1

        else:
            kld = None
            mu = None
            std = None

        decoder_input_2 = self.embedding_2.word_embed(
            decoder_word_input_2
        )  # What to do with this decoder input ? --> Slightly resolved
        out, final_state = self.decoder(
            decoder_input_2, z, drop_prob,
            initial_state)  # Take a look at the decoder

        return out, final_state, kld, mu, std

    def learnable_parameters(self):

        # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer
        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer, batch_loader, batch_loader_2):
        def train(i, batch_size, use_cuda, dropout, start_index):
            input = batch_loader.next_batch(batch_size, 'train', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input
            ''' =================================================== Input for Encoder-2 ========================================================
            '''

            input_2 = batch_loader_2.next_batch(batch_size, 'train',
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]

            [
                encoder_word_input_2, encoder_character_input_2,
                decoder_word_input_2, decoder_character_input_2, target
            ] = input_2
            ''' ================================================================================================================================
            '''
            # exit()

            logits, _, kld, _, _ = self(dropout,
                                        encoder_word_input,
                                        encoder_character_input,
                                        encoder_word_input_2,
                                        encoder_character_input_2,
                                        decoder_word_input_2,
                                        decoder_character_input_2,
                                        z=None)

            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            loss = 79 * cross_entropy + kld_coef(i) * kld

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            return cross_entropy, kld, kld_coef(i)

        return train

    def validater(self, batch_loader, batch_loader_2):
        def validate(batch_size, use_cuda, start_index):
            input = batch_loader.next_batch(batch_size, 'valid', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input
            ''' ==================================================== Input for Encoder-2 ========================================================
            '''

            input_2 = batch_loader_2.next_batch(batch_size, 'valid',
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            [
                encoder_word_input_2, encoder_character_input_2,
                decoder_word_input_2, decoder_character_input_2, target
            ] = input_2
            ''' ==================================================================================================================================
            '''

            logits, _, kld, _, _ = self(0.,
                                        encoder_word_input,
                                        encoder_character_input,
                                        encoder_word_input_2,
                                        encoder_character_input_2,
                                        decoder_word_input_2,
                                        decoder_character_input_2,
                                        z=None)

            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            return cross_entropy, kld

        return validate

    def sample(self, batch_loader, seq_len, seed, use_cuda, State):
        # seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        result = ''

        initial_state = State

        for i in range(seq_len):
            logits, initial_state, _, _, _ = self(0., None, None, None, None,
                                                  decoder_word_input,
                                                  decoder_character_input,
                                                  seed, initial_state)

            # forward(self, drop_prob,
            #           encoder_word_input=None, encoder_character_input=None,
            #           encoder_word_input_2=None, encoder_character_input_2=None,
            #           decoder_word_input_2=None, decoder_character_input_2=None,
            #           z=None, initial_state=None):

            # logits = logits.view(-1, self.params.word_vocab_size)
            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            # print '---------------------------------------'
            # print 'Printing logits'
            # print logits
            # print '------------------------------------------'

            prediction = F.softmax(logits)

            word = batch_loader.sample_word_from_distribution(
                prediction.data.cpu().numpy()[-1])

            if word == batch_loader.end_token:
                break

            result += ' ' + word

            decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]]
                                              ])
            decoder_character_input_np = np.array(
                [[batch_loader.encode_characters(word)]])

            decoder_word_input = Variable(
                t.from_numpy(decoder_word_input_np).long())
            decoder_character_input = Variable(
                t.from_numpy(decoder_character_input_np).long())

            if use_cuda:
                decoder_word_input, decoder_character_input = decoder_word_input.cuda(
                ), decoder_character_input.cuda()

        return result

    def sampler(self, batch_loader, batch_loader_2, seq_len, seed, use_cuda, i,
                beam_size, n_best):
        input = batch_loader.next_batch(1, 'valid', i)
        input = [Variable(t.from_numpy(var)) for var in input]
        input = [var.long() for var in input]
        input = [var.cuda() if use_cuda else var for var in input]
        [
            encoder_word_input, encoder_character_input, decoder_word_input,
            decoder_character_input, target
        ] = input

        encoder_input = self.embedding(encoder_word_input,
                                       encoder_character_input)

        _, h0, c0 = self.encoder(encoder_input, None)
        State = (h0, c0)

        # print '----------------------'
        # print 'Printing h0 ---------->'
        # print h0
        # print '----------------------'

        # State = None
        results, scores = self.sample_beam(batch_loader_2, seq_len, seed,
                                           use_cuda, State, beam_size, n_best)

        return results, scores

    def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State,
                    beam_size, n_best):
        # seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        dec_states = State

        # print '========= Before ================'
        # print "dec_states:", dec_states[0].size()
        # print "dec_states:", dec_states[1].size()
        # print '=================================='

        # dec_states = [
        #     Variable(dec_states[0].repeat(1, beam_size, 1)),
        #     Variable(dec_states[1].repeat(1, beam_size, 1))
        # ]
        dec_states = [
            dec_states[0].repeat(1, beam_size, 1),
            dec_states[1].repeat(1, beam_size, 1)
        ]

        # print'========== After =================='
        # print "dec_states:", dec_states[0].size()
        # print "dec_states:", dec_states[1].size()
        # print '=================================='
        # exit()

        drop_prob = 0.0
        beam_size = beam_size
        batch_size = 1

        beam = [
            Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size)
        ]

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(seq_len):

            input = t.stack([
                b.get_current_state() for b in beam if not b.done
            ]).t().contiguous().view(1, -1)

            trg_emb = self.embedding_2.word_embed(
                Variable(input).transpose(1, 0))

            # print trg_emb.size()
            # print seed.size()

            trg_h, dec_states = self.decoder.only_decoder_beam(
                trg_emb, seed, drop_prob, dec_states)

            # trg_h, (trg_h_t, trg_c_t) = self.model.decoder(trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)), context )

            # print trg_h.size()
            # print trg_h_t.size()
            # print trg_c_t.size()

            # dec_states = (trg_h_t, trg_c_t)

            # print 'State dimension ----------->'
            # print State[0].size()
            # print State[1].size()
            # print '======================================='
            # print "dec_states:", dec_states[0].size()
            # print "dec_states:", dec_states[1].size()
            # print '========== Things successful ==========='

            # exit()

            dec_out = trg_h.squeeze(1)

            # print "dec_out:", dec_out.size()

            out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()

            active = []
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                if not beam[b].advance(word_lk.data[idx]):
                    active += [b]

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = t.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                # select only the remaining active sentences
                view = t.data.view(-1, remaining_sents,
                                   self.params.decoder_rnn_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(active_idx) \
                    // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            dec_states = (update_active(dec_states[0]),
                          update_active(dec_states[1]))
            dec_out = update_active(dec_out)
            # context = update_active(context)

            remaining_sents = len(active)

        # (4) package everything up

        allHyp, allScores = [], []

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()
            # print scores
            # print ks
            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            # print hyps
            # print "------------------"
            allHyp += [hyps]

        # print '==== Complete ========='

        return allHyp, allScores
Beispiel #11
0
class RVAE(nn.Module):
    def __init__(self, params, params_2, path):
        super(RVAE, self).__init__()

        self.params = params
        self.params_2 = params_2  # Encoder-2 parameters

        self.embedding = Embedding(self.params, path)
        self.embedding_2 = Embedding(self.params_2, path, True)

        self.encoder_original = Encoder(self.params)
        if self.params.hrvae:
            self.encoder_paraphrase = EncoderHR(self.params_2)
        else:
            self.encoder_paraphrase = Encoder(self.params_2)

        # self.embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        if self.params.res_model:
            self.decoder = DecoderResidual(self.params_2)
        else:
            self.decoder = Decoder(self.params_2)

    def forward(
        self,
        unk_idx,
        drop_prob,
        encoder_word_input=None,
        encoder_character_input=None,
        encoder_word_input_2=None,
        encoder_character_input_2=None,
        decoder_word_input_2=None,
        decoder_character_input_2=None,
        z=None,
        initial_state=None,
    ):

        # Modified the parameters of forward function according to Encoder-2
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        assert parameters_allocation_check(
            self
        ), "Invalid CUDA options. Parameters should be allocated in the same memory"
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert (
            z is None and fold(
                lambda acc, parameter: acc and parameter is not None,
                [
                    encoder_word_input, encoder_character_input,
                    decoder_word_input_2
                ],
                True,
            ) or (z is not None and decoder_word_input_2 is not None)
        ), "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            """Get context from encoder and sample z ~ N(mu, std)"""  # 把word和character拼接成一个向量
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input, unk_idx,
                                           drop_prob)
            """ ===================================================Doing the same for encoder-2===================================================
            """
            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               encoder_character_input_2,
                                               unk_idx, drop_prob)
            """ ==================================================================================================================================
            """

            enc_out_original, context, h_0, c_0, _ = self.encoder_original(
                encoder_input, None)
            state_original = (h_0, c_0)  # Final state of Encoder-1 原始句子编码
            # state_original = context
            enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase(
                encoder_input_2,
                state_original)  # Encoder_2 for Ques_2  接下去跟释义句编码
            state_paraphrase = (h_0, c_0)  # Final state of Encoder-2 原始句子编码
            # state_paraphrase = context_2

            if context_ is not None:

                mu_ = []
                logvar_ = []
                for entry in context_:
                    mu_.append(self.context_to_mu(entry))
                    logvar_.append(self.context_to_logvar(entry))

                z_sampled = self.sample_gaussian(batch_size)
                if use_cuda:
                    z_sampled = z_sampled.cuda()

                mu = t.stack(mu_)
                logvar = t.stack(logvar_)

                if self.params.wae:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar_[-1], mu_[-1], 1).cuda()
                    p = t.distributions.Normal(mu, t.exp(logvar))
                    q = t.distributions.Normal(mu,
                                               t.ones(logvar.size()).cuda())
                    kld = t.sum(t.distributions.kl_divergence(p, q))
                    kld = kld / mu.shape[0]
                    kld = 0
                    for i in range(len(mu_)):
                        p = t.distributions.Normal(mu_[i], t.exp(logvar_[i]))
                        q = t.distributions.Normal(
                            mu_[i],
                            t.ones(logvar.size()).cuda())
                        kld += t.sum(t.distributions.kl_divergence(p, q))
                    kld = kld / len(mu_)
                    wasserstein_loss = self.imq_kernel(
                        z_sampled, z_tilda, self.params.latent_variable_size)
                    kld = 0.01 * kld + 10 * wasserstein_loss
                else:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar_[-1], mu_[-1], 0.5).cuda()
                    kld = 0
                    for i in range(len(mu_)):
                        kld += (-0.5 * t.sum(
                            logvar_[i] - t.pow(mu_[i], 2) - t.exp(logvar_[i]) +
                            1, 1)).mean().squeeze()
                    kld = kld / len(mu_)

            else:

                mu = self.context_to_mu(context_2)
                logvar = self.context_to_logvar(context_2)

                z_sampled = self.sample_gaussian(batch_size)
                if use_cuda:
                    z_sampled = z_sampled.cuda()

                if self.params.wae:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar, mu, 1).cuda()
                    p = t.distributions.Normal(mu, t.exp(logvar))
                    q = t.distributions.Normal(mu,
                                               t.ones(logvar.size()).cuda())
                    kld = t.sum(t.distributions.kl_divergence(p, q))
                    wasserstein_loss = self.imq_kernel(
                        z_sampled, z_tilda, self.params.latent_variable_size)
                    kld = 0.01 * kld + 10 * wasserstein_loss
                else:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar, mu, 0.5).cuda()
                    kld = (-0.5 *
                           t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                 1)).mean().squeeze()
        else:
            kld = None
            mu = None
            std = None

        # What to do with this decoder input ? --> Slightly resolved
        decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2)
        # if context_ is not None:
        #     decoder_input_2 = t.ones(decoder_input_2.size()).cuda()
        out, final_state = self.decoder(decoder_input_2, z_tilda, drop_prob,
                                        enc_out_paraphrase, state_original)

        return out, final_state, kld, mu, None

    def sample_z_tilda_from_posterior(self,
                                      z_sampled,
                                      z_log_sigma,
                                      z_mean,
                                      z_temperature=0.5):
        """(Differentiably!) draw sample from Gaussian with given shape, subject to random noise epsilon"""
        return z_sampled * t.exp(
            z_log_sigma * z_temperature) + z_mean  # N(mu, I * sigma**2)

    def sample_gaussian(self, batch_size):
        """(Differentiably!) draw sample from Gaussian with given shape, subject to random noise epsilon"""
        return Variable(t.randn([batch_size, self.params.latent_variable_size
                                 ]))  # Dimension [batch_size x latent_dim]

    def imq_kernel(self, sample_qz: t.Tensor, sample_pz: t.Tensor, h_dim: int):
        batch_size = sample_pz.size(0)

        norms_pz = sample_pz.pow(2).sum(1, keepdim=True)  # batch_size x 1
        prods_pz = t.mm(sample_pz, sample_pz.t())  # batch_size x batch_size
        dists_pz = norms_pz + norms_pz.t() - 2 * prods_pz

        norms_qz = sample_qz.pow(2).sum(1, keepdim=True)  # batch_size x 1
        prods_qz = t.mm(sample_qz, sample_qz.t())  # batch_size x batch_size
        dists_qz = norms_qz + norms_qz.t() - 2 * prods_qz

        dotprods = t.mm(sample_qz, sample_pz.t())
        distances = norms_qz + norms_pz.t() - 2 * dotprods

        stats = 0
        Cbase = 2.0 * h_dim * 2.0 * 1.0
        for scale in [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]:
            C = Cbase * scale
            res1 = C / (C + dists_qz)
            res1 += C / (C + dists_pz)

            if t.cuda.is_available():
                res1 = (1 - t.eye(batch_size).cuda()) * res1
            else:
                res1 = (1 - t.eye(batch_size)) * res1

            res1 = res1.sum() / (batch_size * batch_size - batch_size)
            res2 = C / (C + distances)
            res2 = res2.sum() * 2.0 / (batch_size * batch_size)
            stats += res1 - res2

        return stats

    def learnable_parameters(self):
        # wordembedding是固定值,因此必须从优化器的参数列表里移除。
        # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer
        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer, batch_loader, batch_loader_2):
        def train(coef, batch_size, use_cuda, dropout, start_index):
            input = batch_loader.next_batch(batch_size, "train", start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]
            # 这里是data/train.txt,转换变成embedding,用pand补齐,
            # 其中encoder_word_input, encoder_character_input是将 xo原始句输入倒过来前面加若干占位符,
            # decoder_word_input, decoder_character_input是 xo原始句加了开始符号末端补齐
            # target,结束句子后面加了结束符,target是xo原始句加结束符后面加若干占位符
            [
                encoder_word_input,
                encoder_character_input,
                decoder_word_input,
                decoder_character_input,
                target,
                _,
            ] = input
            """ =================================================== Input for Encoder-2 ========================================================
            """

            input_2 = batch_loader_2.next_batch(batch_size, "train",
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            # 这里是data/super/train.txt,转换变成embedding,用pand补齐,
            # 其中encoder_word_input, encoder_character_input是将 释义句xp输入倒过来前面加若干占位符,
            # decoder_word_input, decoder_character_input是 释义句xp加了开始符号末端补齐
            # target,结束句子后面加了结束符,target是释义句xp加结束符后面加若干占位符
            [
                encoder_word_input_2,
                encoder_character_input_2,
                decoder_word_input_2,
                decoder_character_input_2,
                target,
                _,
            ] = input_2
            unk_idx = None
            """ ================================================================================================================================
            """
            # 这里encoder-input是原始句子xo的输入(句子翻转),encoder-input2是释义句xp的输入(句子翻转),decoder-input是释义句加加开始符号
            logits, _, kld, _, _ = self(
                unk_idx,
                dropout,
                encoder_word_input,
                encoder_character_input,
                encoder_word_input_2,
                encoder_character_input_2,
                decoder_word_input_2,
                decoder_character_input_2,
                z=None,
            )

            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)

            # 前面logit 是每一步输出的词汇表所有词的概率, target是每一步对应的词的索引不用变成onehot,函数内部做变换
            cross_entropy = F.cross_entropy(logits, target)

            if self.params.wae:
                loss = 1 * cross_entropy + coef * kld  # 79应该是作者拍脑袋的
            elif self.params.hrvae:
                loss = 79 * cross_entropy + coef * kld  # 79应该是作者拍脑袋的
            else:
                loss = 79 * cross_entropy + coef * kld  # 79应该是作者拍脑袋的

            optimizer.zero_grad()  # 标准用法先计算损失函数值,然后初始化梯度为0,
            loss.backward()  # 然后反向传递
            optimizer.step()  # 反向跟新梯度

            return cross_entropy, kld, coef  # 交叉熵,kl-devergence,kld-coef是为了让他

        return train

    def validater(self, batch_loader, batch_loader_2):
        def validate(batch_size, use_cuda, start_index):
            input = batch_loader.next_batch(batch_size, "valid", start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input
            """ ==================================================== Input for Encoder-2 ========================================================
            """

            input_2 = batch_loader_2.next_batch(batch_size, "valid",
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            [
                encoder_word_input_2,
                encoder_character_input_2,
                decoder_word_input_2,
                decoder_character_input_2,
                target,
            ] = input_2
            """ ==================================================================================================================================
            """
            unk_idx = batch_loader_2.word_to_idx[batch_loader_2.unk_token]
            logits, _, kld, _, _ = self(
                unk_idx,
                0.0,
                encoder_word_input,
                encoder_character_input,
                encoder_word_input_2,
                encoder_character_input_2,
                decoder_word_input_2,
                decoder_character_input_2,
                z=None,
            )

            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            return cross_entropy, kld

        return validate

    def sample(self, batch_loader, seq_len, seed, use_cuda, State):
        # seed = Variable(t.from_numpy(seed).float())
        # seed = Variable(t.randn([1, parameters.latent_variable_size]))
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        result = ""

        initial_state = State

        for i in range(seq_len):
            logits, initial_state, _, _, _ = self(0.0, None, None, None, None,
                                                  decoder_word_input,
                                                  decoder_character_input,
                                                  seed, initial_state)

            # forward(self, drop_prob,
            #           encoder_word_input=None, encoder_character_input=None,
            #           encoder_word_input_2=None, encoder_character_input_2=None,
            #           decoder_word_input_2=None, decoder_character_input_2=None,
            #           z=None, initial_state=None):

            # logits = logits.view(-1, self.params.word_vocab_size)
            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            # print '---------------------------------------'
            # print 'Printing logits'
            # print logits
            # print '------------------------------------------'

            prediction = F.softmax(logits)

            word = batch_loader.sample_word_from_distribution(
                prediction.data.cpu().numpy()[-1])

            if word == batch_loader.end_token:
                break

            result += " " + word

            decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]]
                                              ])
            decoder_character_input_np = np.array(
                [[batch_loader.encode_characters(word)]])

            decoder_word_input = Variable(
                t.from_numpy(decoder_word_input_np).long())
            decoder_character_input = Variable(
                t.from_numpy(decoder_character_input_np).long())

            if use_cuda:
                decoder_word_input, decoder_character_input = decoder_word_input.cuda(
                ), decoder_character_input.cuda()

        return result

    def sampler(self, batch_loader, batch_loader_2, seq_len, seed, use_cuda, i,
                beam_size, n_best):
        input = batch_loader.next_batch(1, "valid", i)
        input = [Variable(t.from_numpy(var)) for var in input]
        input = [var.long() for var in input]
        input = [var.cuda() if use_cuda else var for var in input]
        [
            encoder_word_input, encoder_character_input, decoder_word_input,
            decoder_character_input, target, _
        ] = input

        encoder_input = self.embedding(encoder_word_input,
                                       encoder_character_input)

        encoder_output, _, h0, c0, _ = self.encoder_original(
            encoder_input, None)
        State = (h0, c0)

        # print '----------------------'
        # print 'Printing h0 ---------->'
        # print h0
        # print '----------------------'

        # State = None
        results, scores = self.sample_beam(batch_loader_2, seq_len, seed,
                                           use_cuda, State, beam_size, n_best,
                                           encoder_output)

        return results, scores

    def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State,
                    beam_size, n_best, encoder_output):
        # seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        dec_states = State

        # print '========= Before ================'
        # print "dec_states:", dec_states[0].size()
        # print "dec_states:", dec_states[1].size()
        # print '=================================='

        # dec_states = [
        #     Variable(dec_states[0].repeat(1, beam_size, 1)),
        #     Variable(dec_states[1].repeat(1, beam_size, 1))
        # ]

        dec_states = [
            dec_states[0].repeat(1, beam_size, 1),
            dec_states[1].repeat(1, beam_size, 1)
        ]

        # print'========== After =================='
        # print "dec_states:", dec_states[0].size()
        # print "dec_states:", dec_states[1].size()
        # print '=================================='
        # exit()

        drop_prob = 0.0
        beam_size = beam_size
        batch_size = 1

        beam = [
            Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size)
        ]

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(seq_len):
            input = t.stack([
                b.get_current_state() for b in beam if not b.done
            ]).t().contiguous().view(1, -1)

            trg_emb = self.embedding_2.word_embed(
                Variable(input).transpose(1, 0))

            trg_h, dec_states = self.decoder.only_decoder_beam(
                trg_emb, seed, drop_prob, encoder_output, dec_states)

            # trg_h, (trg_h_t, trg_c_t) = self.model.decoder(trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)), context )

            # print trg_h.size()
            # print trg_h_t.size()
            # print trg_c_t.size()

            # dec_states = (trg_h_t, trg_c_t)

            # print 'State dimension ----------->'
            # print State[0].size()
            # print State[1].size()
            # print '======================================='
            # print "dec_states:", dec_states[0].size()
            # print "dec_states:", dec_states[1].size()
            # print '========== Things successful ==========='

            # exit()

            dec_out = trg_h.squeeze(1)

            # print "dec_out:", dec_out.size()

            out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()

            active = []
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                if not beam[b].advance(word_lk.data[idx]):
                    active += [b]

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = t.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                # select only the remaining active sentences
                view = t.data.view(-1, remaining_sents,
                                   self.params.decoder_rnn_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(
                    active_idx) // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            dec_states = (update_active(dec_states[0]),
                          update_active(dec_states[1]))
            dec_out = update_active(dec_out)
            # context = update_active(context)

            remaining_sents = len(active)

        # (4) package everything up

        allHyp, allScores = [], []

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()
            # print scores
            # print ks
            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            # print hyps
            # print "------------------"
            allHyp += [hyps]

        # print '==== Complete ========='

        return allHyp, allScores
Beispiel #12
0
class RVAE(nn.Module):
    def __init__(self, params,params_2):
        super(RVAE, self).__init__()

        self.params = params
        self.params_2 = params_2        #Encoder-2 parameters

        self.embedding = Embedding(self.params, '')
        self.embedding_2 = Embedding(self.params_2, '')

        self.encoder = Encoder(self.params)
        self.encoder_2 = Encoder(self.params_2)


        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)

        self.encoder_3 = Encoder(self.params)
        self.decoder = Decoder(self.params_2)         #change this to params_2

    def forward(self, drop_prob,
                encoder_word_input=None, encoder_character_input=None,
                encoder_word_input_2=None, encoder_character_input_2=None,
                decoder_word_input_2=None, decoder_character_input_2=None,
                z=None, initial_state=None):

                #Modified the parameters of forward function according to Encoder-2
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
            or (z is not None and decoder_word_input_2 is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            '''
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input, encoder_character_input)

            ''' ===================================================Doing the same for encoder-2===================================================
            '''
            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2)

            ''' ==================================================================================================================================
            '''
            
            context , h_0 , c_0 = self.encoder(encoder_input, None)
            
            State = (h_0,c_0) #Final state of Encoder-1
            context_2 , _ , _ = self.encoder_2( encoder_input_2, State )   #Encoder_2 for Ques_2
            
            mu = self.context_to_mu(context_2)
            logvar = self.context_to_logvar(context_2)
            std = t.exp(0.5 * logvar)

            z = Variable(t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze()

            encoder_input = self.embedding(encoder_word_input, encoder_character_input)
            _ , h_0 , c_0 = self.encoder_3(encoder_input, None)
            initial_state = (h_0,c_0) #Final state of Encoder-1

        else:
            kld = None


        

        decoder_input_2 = self.embedding.word_embed(decoder_word_input_2)   # What to do with this decoder input ? --> Slightly resolved
        out, final_state = self.decoder(decoder_input_2, z, drop_prob, initial_state)           # Take a look at the decoder

        return out, final_state, kld

    def learnable_parameters(self):

        # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer
        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer, batch_loader, batch_loader_2):
        def train(i, batch_size, use_cuda, dropout, start_index):
            input = batch_loader.next_batch(batch_size, 'train', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target] = input


            ''' =================================================== Input for Encoder-2 ========================================================
            '''

            input_2 = batch_loader_2.next_batch(batch_size, 'train', start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]

            [encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target] = input_2

            ''' ================================================================================================================================
            '''
            # exit()

            logits, _, kld = self(dropout,
                                  encoder_word_input, encoder_character_input,
                                  encoder_word_input_2,encoder_character_input_2,
                                  decoder_word_input_2, decoder_character_input_2,
                                  z=None)

            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            loss = 79 * cross_entropy + kld_coef(i) * kld

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            return cross_entropy, kld, kld_coef(i)

        return train

    def validater(self, batch_loader,batch_loader_2):
        def validate(batch_size, use_cuda, start_index):
            input = batch_loader.next_batch(batch_size, 'valid', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target] = input

            ''' ==================================================== Input for Encoder-2 ========================================================
            '''

            input_2 = batch_loader_2.next_batch(batch_size, 'valid', start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            [encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target] = input_2

            ''' ==================================================================================================================================
            '''

            logits, _, kld = self(0.,
                                  encoder_word_input, encoder_character_input,
                                  encoder_word_input_2,encoder_character_input_2,
                                  decoder_word_input_2, decoder_character_input_2,
                                  z=None)

            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            return cross_entropy, kld

        return validate

    def sample(self, batch_loader, seq_len, seed, use_cuda, State):
        seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(1)

        decoder_word_input = Variable(t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(), decoder_character_input.cuda()

        result = ''

        initial_state = State

        for i in range(seq_len):
            logits, initial_state, _ = self(0., None, None,
                                                None, None,
                                            decoder_word_input, decoder_character_input,
                                            seed, initial_state)


            # forward(self, drop_prob,
            #           encoder_word_input=None, encoder_character_input=None,
            #           encoder_word_input_2=None, encoder_character_input_2=None,
            #           decoder_word_input_2=None, decoder_character_input_2=None,
            #           z=None, initial_state=None):

            # logits = logits.view(-1, self.params.word_vocab_size)
            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            # print '---------------------------------------'
            # print 'Printing logits'
            # print logits
            # print '------------------------------------------'

            prediction = F.softmax(logits)

            word = batch_loader.sample_word_from_distribution(prediction.data.cpu().numpy()[-1])

            if word == batch_loader.end_token:
                break

            result += ' ' + word

            decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]]])
            decoder_character_input_np = np.array([[batch_loader.encode_characters(word)]])

            decoder_word_input = Variable(t.from_numpy(decoder_word_input_np).long())
            decoder_character_input = Variable(t.from_numpy(decoder_character_input_np).long())

            if use_cuda:
                decoder_word_input, decoder_character_input = decoder_word_input.cuda(), decoder_character_input.cuda()

        return result

    def sampler(self, batch_loader, seq_len, seed, use_cuda):
        input = batch_loader.next_batch(1, 'valid', 1)
        input = [Variable(t.from_numpy(var)) for var in input]
        input = [var.long() for var in input]
        input = [var.cuda() if use_cuda else var for var in input]
        [encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target] = input

        encoder_input = self.embedding(encoder_word_input, encoder_character_input)

        _ , h0 , c0 = self.encoder_3(encoder_input, None)
        State = (h0,c0)

        # print '----------------------'
        # print 'Printing h0 ---------->'
        # print h0
        # print '----------------------'

        # State = None
        result = self.sample(batch_loader, seq_len, seed, use_cuda, State)

        return result
Beispiel #13
0
class RVAE(nn.Module):
    def __init__(self, params, params_2):
        super(RVAE, self).__init__()

        self.params = params
        self.params_2 = params_2  # Encoder-2 parameters

        self.embedding = Embedding(self.params, '')
        self.embedding_2 = Embedding(self.params_2, '', True)

        self.encoder = Encoder(self.params)
        self.encoder_2 = Encoder(self.params_2)

        #
        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        # self.encoder_3 = Encoder(self.params)
        self.decoder = Decoder(self.params_2)  # change this to params_2

    def forward(self,
                drop_prob,
                encoder_word_input=None,
                encoder_character_input=None,
                encoder_word_input_2=None,
                encoder_character_input_2=None,
                decoder_word_input_2=None,
                decoder_character_input_2=None,
                z=None,
                initial_state=None):

        assert parameters_allocation_check(self)
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
               or (z is not None and decoder_word_input_2 is not None)

        if z is None:
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)

            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               encoder_character_input_2)

            context, h_0, c_0 = self.encoder(encoder_input, None)

            State = (h_0, c_0)
            context_2, _, _ = self.encoder_2(encoder_input_2, State)

            mu = self.context_to_mu(context_2)
            logvar = self.context_to_logvar(context_2)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                1)).mean().squeeze()

            # encoder_input = self.embedding(encoder_word_input, encoder_character_input)
            # _ , h_0 , c_0 = self.encoder_3(encoder_input, None)
            initial_state = State

        else:
            kld = None
            mu = None
            std = None

        decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2)
        out, final_state = self.decoder(decoder_input_2, z, drop_prob,
                                        initial_state)

        return out, final_state, kld, mu, std

    def learnable_parameters(self):
        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer, batch_loader, batch_loader_2):
        def train(i, batch_size, use_cuda, dropout, start_index):
            input = batch_loader.next_batch(batch_size, 'train', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]
            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input

            input_2 = batch_loader_2.next_batch(batch_size, 'train',
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            [
                encoder_word_input_2, encoder_character_input_2,
                decoder_word_input_2, decoder_character_input_2, target
            ] = input_2

            logits, _, kld, _, _ = self(dropout,
                                        encoder_word_input,
                                        encoder_character_input,
                                        encoder_word_input_2,
                                        encoder_character_input_2,
                                        decoder_word_input_2,
                                        decoder_character_input_2,
                                        z=None)

            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)
            loss = 79 * cross_entropy + kld_coef(i) * kld
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            return cross_entropy, kld, kld_coef(i)

        return train

    def validater(self, batch_loader, batch_loader_2):
        def validate(batch_size, use_cuda, start_index):
            input = batch_loader.next_batch(batch_size, 'valid', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input

            input_2 = batch_loader_2.next_batch(batch_size, 'valid',
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            [
                encoder_word_input_2, encoder_character_input_2,
                decoder_word_input_2, decoder_character_input_2, target
            ] = input_2

            logits, _, kld, _, _ = self(0.,
                                        encoder_word_input,
                                        encoder_character_input,
                                        encoder_word_input_2,
                                        encoder_character_input_2,
                                        decoder_word_input_2,
                                        decoder_character_input_2,
                                        z=None)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            return cross_entropy, kld

        return validate

    def sample(self, batch_loader, seq_len, seed, use_cuda, State):
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        result = ''

        initial_state = State

        for i in range(seq_len):
            logits, initial_state, _, _, _ = self(0., None, None, None, None,
                                                  decoder_word_input,
                                                  decoder_character_input,
                                                  seed, initial_state)

            # forward(self, drop_prob,
            #           encoder_word_input=None, encoder_character_input=None,
            #           encoder_word_input_2=None, encoder_character_input_2=None,
            #           decoder_word_input_2=None, decoder_character_input_2=None,
            #           z=None, initial_state=None):

            # logits = logits.view(-1, self.params.word_vocab_size)
            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            # print '---------------------------------------'
            # print 'Printing logits'
            # print logits
            # print '------------------------------------------'

            prediction = F.softmax(logits)

            word = batch_loader.sample_word_from_distribution(
                prediction.data.cpu().numpy()[-1])

            if word == batch_loader.end_token:
                break

            result += ' ' + word

            decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]]
                                              ])
            decoder_character_input_np = np.array(
                [[batch_loader.encode_characters(word)]])

            decoder_word_input = Variable(
                t.from_numpy(decoder_word_input_np).long())
            decoder_character_input = Variable(
                t.from_numpy(decoder_character_input_np).long())

            if use_cuda:
                decoder_word_input, decoder_character_input = decoder_word_input.cuda(
                ), decoder_character_input.cuda()

        return result

    def sampler(self, batch_loader, batch_loader_2, seq_len, seed, use_cuda, i,
                beam_size, n_best):
        input = batch_loader.next_batch(1, 'valid', i)
        input = [Variable(t.from_numpy(var)) for var in input]
        input = [var.long() for var in input]
        input = [var.cuda() if use_cuda else var for var in input]
        [
            encoder_word_input, encoder_character_input, decoder_word_input,
            decoder_character_input, target
        ] = input

        encoder_input = self.embedding(encoder_word_input,
                                       encoder_character_input)

        _, h0, c0 = self.encoder(encoder_input, None)
        State = (h0, c0)

        results, scores = self.sample_beam(batch_loader_2, seq_len, seed,
                                           use_cuda, State, beam_size, n_best)

        return results, scores

    def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State,
                    beam_size, n_best):
        # seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        dec_states = State

        dec_states = [
            dec_states[0].repeat(1, beam_size, 1),
            dec_states[1].repeat(1, beam_size, 1)
        ]

        drop_prob = 0.0
        beam_size = beam_size
        batch_size = 1

        beam = [
            Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size)
        ]

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(seq_len):

            input = t.stack([
                b.get_current_state() for b in beam if not b.done
            ]).t().contiguous().view(1, -1)

            trg_emb = self.embedding_2.word_embed(
                Variable(input).transpose(1, 0))

            # print trg_emb.size()
            # print seed.size()

            trg_h, dec_states = self.decoder.only_decoder_beam(
                trg_emb, seed, drop_prob, dec_states)

            dec_out = trg_h.squeeze(1)

            # print "dec_out:", dec_out.size()

            out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()

            active = []
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                if not beam[b].advance(word_lk.data[idx]):
                    active += [b]

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            if not active:
                break

            active_idx = t.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                view = t.data.view(-1, remaining_sents,
                                   self.params.decoder_rnn_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(active_idx) \
                               // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            dec_states = (update_active(dec_states[0]),
                          update_active(dec_states[1]))
            dec_out = update_active(dec_out)

            remaining_sents = len(active)

        allHyp, allScores = [], []

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()
            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            allHyp += [hyps]

        return allHyp, allScores
Beispiel #14
0
class RVAE(nn.Module):
    def __init__(self, params, params_2):
        super(RVAE, self).__init__()

        self.params = params
        self.params_2 = params_2

        self.embedding = Embedding(self.params, '')
        self.embedding_2 = Embedding(self.params_2, '')

        self.encoder = Encoder(self.params)
        self.encoder_2 = Encoder(self.params_2)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        self.encoder_3 = Encoder(self.params)
        self.decoder = Decoder(self.params_2)

    def forward(self,
                drop_prob,
                encoder_word_input=None,
                encoder_character_input=None,
                encoder_word_input_2=None,
                encoder_character_input_2=None,
                decoder_word_input_2=None,
                decoder_character_input_2=None,
                z=None,
                initial_state=None):

        assert parameters_allocation_check(self)
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
            or (z is not None and decoder_word_input_2 is not None)

        if z is None:
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)

            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               encoder_character_input_2)

            context, h_0, c_0 = self.encoder(encoder_input, None)

            State = (h_0, c_0)
            context_2, _, _ = self.encoder_2(encoder_input_2, State)

            mu = self.context_to_mu(context_2)
            logvar = self.context_to_logvar(context_2)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                1)).mean().squeeze()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)
            _, h_0, c_0 = self.encoder_3(encoder_input, None)
            initial_state = (h_0, c_0)

        else:
            kld = None
        decoder_input_2 = self.embedding.word_embed(decoder_word_input_2)
        out, final_state = self.decoder(decoder_input_2, z, drop_prob,
                                        initial_state)

        return out, final_state, kld

    def learnable_parameters(self):

        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer, batch_loader, batch_loader_2):
        def train(i, batch_size, use_cuda, dropout, start_index):
            input = batch_loader.next_batch(batch_size, 'train', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input

            input_2 = batch_loader_2.next_batch(batch_size, 'train',
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]

            [
                encoder_word_input_2, encoder_character_input_2,
                decoder_word_input_2, decoder_character_input_2, target
            ] = input_2

            logits, _, kld = self(dropout,
                                  encoder_word_input,
                                  encoder_character_input,
                                  encoder_word_input_2,
                                  encoder_character_input_2,
                                  decoder_word_input_2,
                                  decoder_character_input_2,
                                  z=None)

            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            loss = 79 * cross_entropy + kld_coef(i) * kld

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            return cross_entropy, kld, kld_coef(i)

        return train

    def validater(self, batch_loader, batch_loader_2):
        def validate(batch_size, use_cuda, start_index):
            input = batch_loader.next_batch(batch_size, 'valid', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input
            input_2 = batch_loader_2.next_batch(batch_size, 'valid',
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            [
                encoder_word_input_2, encoder_character_input_2,
                decoder_word_input_2, decoder_character_input_2, target
            ] = input_2
            logits, _, kld = self(0.,
                                  encoder_word_input,
                                  encoder_character_input,
                                  encoder_word_input_2,
                                  encoder_character_input_2,
                                  decoder_word_input_2,
                                  decoder_character_input_2,
                                  z=None)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            return cross_entropy, kld

        return validate

    def sample(self, batch_loader, seq_len, seed, use_cuda, State):
        seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        result = ''

        initial_state = State

        for i in range(seq_len):
            logits, initial_state, _ = self(0., None, None, None, None,
                                            decoder_word_input,
                                            decoder_character_input, seed,
                                            initial_state)

            logits = logits.view(-1, self.params_2.word_vocab_size)
            prediction = F.softmax(logits)

            word = batch_loader.sample_word_from_distribution(
                prediction.data.cpu().numpy()[-1])

            if word == batch_loader.end_token:
                break

            result += ' ' + word

            decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]]
                                              ])
            decoder_character_input_np = np.array(
                [[batch_loader.encode_characters(word)]])

            decoder_word_input = Variable(
                t.from_numpy(decoder_word_input_np).long())
            decoder_character_input = Variable(
                t.from_numpy(decoder_character_input_np).long())

            if use_cuda:
                decoder_word_input, decoder_character_input = decoder_word_input.cuda(
                ), decoder_character_input.cuda()

        return result

    def sampler(self, batch_loader, seq_len, seed, use_cuda):
        input = batch_loader.next_batch(1, 'valid', 1)
        input = [Variable(t.from_numpy(var)) for var in input]
        input = [var.long() for var in input]
        input = [var.cuda() if use_cuda else var for var in input]
        [
            encoder_word_input, encoder_character_input, decoder_word_input,
            decoder_character_input, target
        ] = input

        encoder_input = self.embedding(encoder_word_input,
                                       encoder_character_input)

        _, h0, c0 = self.encoder_3(encoder_input, None)
        State = (h0, c0)
        result = self.sample(batch_loader, seq_len, seed, use_cuda, State)
        return result
class RVAE(nn.Module):
    def __init__(self, params: object, params_2: object, path: str) -> None:
        """
        [summary] initializes the RVAE  with the correct parameters and data files

        Args:
            params (object): [description] parameters for original encoder
            params_2 (object): [description] parameters for paraphrase encoder
            path (str): [description] a path to the data files
        """
        super(RVAE, self).__init__()

        self.params = params
        self.params_2 = params_2

        self.embedding = Embedding(self.params, path)
        self.embedding_2 = Embedding(self.params_2, path, True)

        self.encoder_original = Encoder(self.params)
        if self.params.hrvae:
            self.encoder_paraphrase = EncoderHR(self.params_2)
        else:
            self.encoder_paraphrase = Encoder(self.params_2)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                       self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)

        if self.params.attn_model and self.params.res_model:
            self.decoder = DecoderResidualAttention(self.params_2)
        elif self.params.attn_model:
            self.decoder = DecoderAttention(self.params_2)
        elif self.params.res_model:
            self.decoder = DecoderResidual(self.params_2)
        else:
            self.decoder = Decoder(self.params_2)

    def forward(self,
                unk_idx: int,
                drop_prob: float,
                encoder_word_input: object = None,
                encoder_character_input: object = None,
                encoder_word_input_2: object = None,
                encoder_character_input_2: object = None,
                decoder_word_input_2: object = None,
                decoder_character_input_2: object = None,
                z: object = None,
                initial_state: tuple = None) -> tuple:
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
            or (z is not None and decoder_word_input_2 is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std) '''
            [batch_size, _] = encoder_word_input.size()
            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input, unk_idx,
                                           drop_prob)
            ''' ===================================================Doing the same for encoder-2=================================================== '''
            [batch_size_2, _] = encoder_word_input_2.size()
            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               encoder_character_input_2,
                                               unk_idx, drop_prob)
            ''' ================================================================================================================================== '''
            enc_out_original, context, h_0, c_0, _ = self.encoder_original(
                encoder_input, None)
            state_original = (h_0, c_0)  # Final state of Encoder-1
            enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase(
                encoder_input_2, state_original)  # Encoder_2 for Ques_2
            state_paraphrase = (h_0, c_0)  # Final state of Encoder-2

            if context_ is not None:

                mu_ = []
                logvar_ = []
                for entry in context_:
                    mu_.append(self.context_to_mu(entry))
                    logvar_.append(self.context_to_logvar(entry))

                std = t.exp(0.5 * logvar_[-1])

                z = Variable(
                    t.randn([batch_size, self.params.latent_variable_size]))
                if use_cuda:
                    z = z.cuda()

                z = z * std + mu_[-1]

                mu = t.stack(mu_)
                logvar = t.stack(logvar_)

                kld = -0.5 * t.sum(1 + logvar - mu.pow(2) - logvar.exp())
                kld = kld / mu.shape[0]

            else:

                mu = self.context_to_mu(context_2)
                logvar = self.context_to_logvar(context_2)
                std = t.exp(0.5 * logvar)

                z = Variable(
                    t.randn([batch_size, self.params.latent_variable_size]))
                if use_cuda:
                    z = z.cuda()

                z = z * std + mu

                kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                    1)).mean().squeeze()

        else:
            kld = None
            mu = None
            std = None

        decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2)
        out, final_state = self.decoder(decoder_input_2, z, drop_prob,
                                        enc_out_paraphrase, state_original)

        return out, final_state, kld, mu, std

    def learnable_parameters(self) -> list:
        """ creates a gradients for each parameter in the class to be optimized """
        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer: object, batch_loader: object,
                batch_loader_2: object) -> object:
        def train(coef: float, batch_size: int, use_cuda: bool, dropout: float,
                  start_index: int) -> tuple:
            """ train the encoder/decoder step by step via train() """
            input = batch_loader.next_batch(batch_size, 'train', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target, _
            ] = input
            ''' =================================================== Input for Encoder-2 ========================================================'''
            input_2 = batch_loader_2.next_batch(batch_size, 'train',
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            [
                encoder_word_input_2, encoder_character_input_2,
                decoder_word_input_2, decoder_character_input_2, target, _
            ] = input_2
            unk_idx = None
            ''' ================================================================================================================================ '''
            logits, _, kld, _, _ = self(unk_idx,
                                        dropout,
                                        encoder_word_input,
                                        encoder_character_input,
                                        encoder_word_input_2,
                                        encoder_character_input_2,
                                        decoder_word_input_2,
                                        decoder_character_input_2,
                                        z=None)

            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            loss = 79 * cross_entropy + coef * kld  # 79 as arbitrary loss weight

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            return cross_entropy, kld, coef

        return train

    def validater(self, batch_loader, batch_loader_2):
        def validate(batch_size, use_cuda, start_index):
            """ validate the encoder/decoder step by step via validate() """
            input = batch_loader.next_batch(batch_size, 'valid', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input
            ''' ==================================================== Input for Encoder-2 ========================================================
            '''

            input_2 = batch_loader_2.next_batch(batch_size, 'valid',
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            [
                encoder_word_input_2, encoder_character_input_2,
                decoder_word_input_2, decoder_character_input_2, target
            ] = input_2
            ''' ==================================================================================================================================
            '''
            unk_idx = batch_loader_2.word_to_idx[batch_loader_2.unk_token]
            logits, _, kld, _, _ = self(unk_idx,
                                        0.,
                                        encoder_word_input,
                                        encoder_character_input,
                                        encoder_word_input_2,
                                        encoder_character_input_2,
                                        decoder_word_input_2,
                                        decoder_character_input_2,
                                        z=None)

            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            return cross_entropy, kld

        return validate

    def sample(self, batch_loader: object, seq_len: int, seed: int,
               use_cuda: bool, State: object) -> tuple:
        """ unroll the decoder step by step to obtain a sample, based on the input encoded original and a random seed """
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        result = ''

        initial_state = State

        for i in range(seq_len):
            logits, initial_state, _, _, _ = self(0., None, None, None, None,
                                                  decoder_word_input,
                                                  decoder_character_input,
                                                  seed, initial_state)

            logits = logits.view(-1, self.params_2.word_vocab_size)
            prediction = F.softmax(logits)

            word = batch_loader.sample_word_from_distribution(
                prediction.data.cpu().numpy()[-1])

            if word == batch_loader.end_token:
                break

            result += ' ' + word

            decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]]
                                              ])
            decoder_character_input_np = np.array(
                [[batch_loader.encode_characters(word)]])

            decoder_word_input = Variable(
                t.from_numpy(decoder_word_input_np).long())
            decoder_character_input = Variable(
                t.from_numpy(decoder_character_input_np).long())

            if use_cuda:
                decoder_word_input, decoder_character_input = decoder_word_input.cuda(
                ), decoder_character_input.cuda()

        return result

    def sampler(self, batch_loader, batch_loader_2, seq_len, seed, use_cuda, i,
                beam_size, n_best):
        """ sample using a encoded sentence and a beam search over the states of the decoder """
        input = batch_loader.next_batch(1, 'valid', i)
        input = [Variable(t.from_numpy(var)) for var in input]
        input = [var.long() for var in input]
        input = [var.cuda() if use_cuda else var for var in input]
        [
            encoder_word_input, encoder_character_input, decoder_word_input,
            decoder_character_input, target, _
        ] = input

        encoder_input = self.embedding(encoder_word_input,
                                       encoder_character_input)

        encoder_output, _, h0, c0, _ = self.encoder_original(
            encoder_input, None)
        State = (h0, c0)

        results, scores = self.sample_beam(batch_loader_2, seq_len, seed,
                                           use_cuda, State, beam_size, n_best,
                                           encoder_output)

        return results, scores

    def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State,
                    beam_size, n_best, encoder_output):
        """ sample and beam search for unrolling every step of the decoder based on a encoded original input sentence """
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        dec_states = State

        dec_states = [
            dec_states[0].repeat(1, beam_size, 1),
            dec_states[1].repeat(1, beam_size, 1)
        ]

        drop_prob = 0.0
        beam_size = beam_size
        batch_size = 1

        beam = [
            Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size)
        ]

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(seq_len):
            input = t.stack([
                b.get_current_state() for b in beam if not b.done
            ]).t().contiguous().view(1, -1)

            trg_emb = self.embedding_2.word_embed(
                Variable(input).transpose(1, 0))
            trg_h, dec_states = self.decoder.only_decoder_beam(
                trg_emb, seed, drop_prob, encoder_output, dec_states)

            dec_out = trg_h.squeeze(1)
            out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()

            active = []
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                if not beam[b].advance(word_lk.data[idx]):
                    active += [b]

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = t.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                # select only the remaining active sentences
                view = t.data.view(-1, remaining_sents,
                                   self.params.decoder_rnn_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(active_idx) \
                    // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            dec_states = (update_active(dec_states[0]),
                          update_active(dec_states[1]))
            dec_out = update_active(dec_out)
            remaining_sents = len(active)

        allHyp, allScores = [], []

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()
            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            allHyp += [hyps]

        # print '==== Complete ========='

        return allHyp, allScores
Beispiel #16
0
class RVAE_dilated(nn.Module):
    def __init__(self, params, regularised):
        super(RVAE_dilated, self).__init__()

        self.params = params

        self.embedding = Embedding(self.params, '')

        self.regularised = regularised

        if self.regularised:
            print("Highly regularised Encoder")
            self.encoder = HREncoder(self.params)
            self.layer_dim = self.params.encoder_num_layers * 2 * self.params.encoder_rnn_size
            self.context_to_mu = nn.Linear(self.layer_dim * 2,
                                           self.params.latent_variable_size)
            self.context_to_logvar = nn.Linear(
                self.layer_dim * 2, self.params.latent_variable_size)
        elif not self.regularised:
            print('Classic encoder')
            self.encoder = Encoder(self.params)
            self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2,
                                           self.params.latent_variable_size)
            self.context_to_logvar = nn.Linear(
                self.params.encoder_rnn_size * 2,
                self.params.latent_variable_size)

        self.decoder = Decoder(self.params)

    def forward(self,
                drop_prob,
                encoder_word_input=None,
                encoder_character_input=None,
                decoder_word_input=None,
                z=None):
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 kld loss estimation
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input],
                                  True) \
               or (z is not None and decoder_word_input is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            '''
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)

            context = self.encoder(encoder_input)

            mu = self.context_to_mu(context)
            logvar = self.context_to_logvar(context)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                1)).mean().squeeze()
        else:
            kld = None

        decoder_input = self.embedding.word_embed(decoder_word_input)
        out = self.decoder(decoder_input, z, drop_prob)

        return out, kld

    def learnable_parameters(self):

        # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer
        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer, batch_loader):
        perplexity = Perplexity()

        def train(i, batch_size, use_cuda, dropout):
            input = batch_loader.next_batch(batch_size, 'train')
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, _, target
            ] = input

            logits, kld = self(dropout,
                               encoder_word_input,
                               encoder_character_input,
                               decoder_word_input,
                               z=None)

            logits = logits.view(-1, self.params.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits,
                                            target,
                                            ignore_index=0,
                                            reduction="sum")

            # since cross entropy is averaged over seq_len, it is necessary to approximate new kld
            # loss = 79 * cross_entropy + kld
            #loss = cross_entropy + kld

            logits = logits.view(batch_size, -1, self.params.word_vocab_size)
            target = target.view(batch_size, -1)
            ppl = perplexity(logits, target).mean()

            optimizer.zero_grad()
            cross_entropy.backward()
            optimizer.step()

            return ppl, kld, cross_entropy

        return train

    def validater(self, batch_loader):
        perplexity = Perplexity()

        def validate(batch_size, use_cuda):
            input = batch_loader.next_batch(batch_size, 'valid')
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, _, target
            ] = input

            logits, kld = self(0.,
                               encoder_word_input,
                               encoder_character_input,
                               decoder_word_input,
                               z=None)
            ppl = perplexity(logits, target).mean()
            logits = logits.view(-1, self.params.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits,
                                            target,
                                            ignore_index=0,
                                            reduction="sum")
            #loss = cross_entropy + kld

            return ppl, kld, cross_entropy

        return validate

    def tester(self, batch_loader):
        perplexity = Perplexity()

        def test(batch_size, use_cuda):
            input = batch_loader.next_batch(batch_size, 'test')
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, _, target
            ] = input

            logits, kld = self(0.,
                               encoder_word_input,
                               encoder_character_input,
                               decoder_word_input,
                               z=None)
            ppl = perplexity(logits, target).mean()
            logits = logits.view(-1, self.params.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits,
                                            target,
                                            ignore_index=0,
                                            reduction="sum")
            #loss = cross_entropy + kld

            return ppl, kld, cross_entropy

        return test

    def sample(self, batch_loader, seq_len, seed, use_cuda):
        seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, _ = batch_loader.go_input(1)
        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())

        if use_cuda:
            decoder_word_input = decoder_word_input.cuda()

        result = ''

        for i in range(seq_len):
            logits, _ = self(0., None, None, decoder_word_input, seed)

            [_, sl, _] = logits.size()

            logits = logits.view(-1, self.params.word_vocab_size)
            prediction = F.softmax(logits)
            prediction = prediction.view(1, sl, -1)

            # take the last word from prefiction and append it to result
            word = batch_loader.sample_word_from_distribution(
                prediction.data.cpu().numpy()[0, -1])

            if word == batch_loader.end_token:
                break

            result += ' ' + word

            word = np.array([[batch_loader.word_to_idx[word]]])

            decoder_word_input_np = np.append(decoder_word_input_np, word, 1)
            decoder_word_input = Variable(
                t.from_numpy(decoder_word_input_np).long())

            if use_cuda:
                decoder_word_input = decoder_word_input.cuda()

        return result