Beispiel #1
0
def beamsearch(memory, model, device, beam_size=4, candidates=1, max_seq_length=128, bos_token=1, eos_token=2):
    # memory: Tx1xE
    model.eval()

    beam = Beam(beam_size=beam_size, min_length=0, n_top=candidates, ranker=None, start_token_id=bos_token,
                end_token_id=eos_token)

    with torch.no_grad():
        #        memory = memory.repeat(1, beam_size, 1) # TxNxE
        memory = model.SequenceModeling.expand_memory(memory, beam_size)

        for _ in range(max_seq_length):

            tgt_inp = beam.get_current_state().transpose(0, 1).to(device)  # TxN
            decoder_outputs, memory = model.SequenceModeling.forward_decoder(tgt_inp, memory)

            log_prob = log_softmax(decoder_outputs[:, -1, :].squeeze(0), dim=-1)
            beam.advance(log_prob.cpu())

            if beam.done():
                break

        scores, ks = beam.sort_finished(minimum=1)

        hypothesises = []
        for i, (times, k) in enumerate(ks[:candidates]):
            hypothesis = beam.get_hypothesis(times, k)
            hypothesises.append(hypothesis)

    return [1] + [int(i) for i in hypothesises[0][:-1]]
def gensummary_gpt2(template_vec,
                    ge,
                    vocab,
                    LMModel,
                    word_list,
                    subvocab,
                    clustermask=None,
                    mono=True,
                    renorm=True,
                    temperature=1,
                    bpe2word='last',
                    max_step = 20,
                    beam_width = 10,
                    beam_width_start = 10,
                    alpha=0.1,
                    alpha_start=0.1,
                    begineos=True,
                    stopbyLMeos=False,
                    devid=0,
                    **kwargs):
    """
    Unsupervised sentence summary generation using beam search, by contextual matching and a summary style language model.
    The contextual matching here is on top of pretrained ELMo embeddings.
    
    Input:
        template_vec: forward only ELMo embeddings of the source sentence. 'torch.Tensor' of size (3, seq_len, 512).
        ge: 'gpt2_sequential_embedder.GPT2Embedder' object.
        vocab: 'torchtext.vocab.Vocab' object. Should be the same as is used for the pretrained language model.
        LMModel: a pretrained language model on the summary sentences.
        word_list: a list of words in the vocabulary to work with. 'List'.
        subvocab: 'torch.LongTensor' consisting of the indices of the words corresponding to 'word_list'.
        clustermask: a binary mask for each of the sub-vocabulary word. 'torch.ByteTensor' of size (len(sub-vocabulary), len(vocabulary)). Default:None.
        mono: whether to keep monotonicity contraint. Default: True.
        renorm: whether to renormalize the probabilities over the sub-vocabulary. Default: True.
        Temperature: temperature applied to the softmax in the language model. Default: 1.
        bpe2word: how to turn the BPE vectors into word vectors. Choose from ['last', 'avg']. Default: 'last'.
        max_step: maximum number of beam steps.
        beam_width: beam width.
        beam_width_start: beam width of the first step.
        alpha: the amount of language model part used for scoring. The score is: (1 - \alpha) * similarity_logscore + \alpha * LM_logscore.
        begineos: whether to begin with the special '<eos>' token as is trained in the language model. Note that ELMo has its own special beginning token. Default: True.
        stopbyLMeos: whether to stop a sentence solely by the language model predicting '<eos>' as the top possibility. Default: False.
        devid: device id to run the algorithm and LSTM language models. 'int', default: 0. -1 for cpu.
        **kwargs: other arguments input to function <Beam.beamstep>. 
            E.g. normalized: whether to normalize the dot product when calculating the similarity, which makes it cosine similarity. Default: True.
                 ifadditive: whether to use an additive model on mixing the probability scores. Default: False.
    
    Output:
        beam: 'Beam' object, recording all the generated sequences.
        
    """
    device = 'cpu' if devid == -1 else f'cuda:{devid}'
    
    # Beam Search: initialization
    if begineos:
        beam = Beam(1, vocab, init_ids=[vocab.stoi['<eos>']], device=device,
                sim_score=0, lm_score=0, lm_state=None, gpt2_state=None, align_loc=None)
    else:
        beam = Beam(1, vocab, init_ids=[None], device=device,
                sim_score=0, lm_score=0, lm_state=None, gpt2_state=None, align_loc=None)
    
    # first step: start with 'beam_width_start' best matched words
    beam.beamstep(beam_width_start,
                  beam.combscoreK_GPT2,
                  template_vec=template_vec,
                  ge=ge,
                  LMModel=LMModel,
                  word_list=word_list,
                  subvocab=subvocab,
                  clustermask=clustermask, 
                  alpha=alpha_start,
                  renorm=renorm,
                  temperature=temperature,
                  bpe2word=bpe2word,
                  normalized=True,
                  ifadditive=False,
                  **kwargs)
    
    # run beam search, until all sentences hit <EOS> or max_step reached
    for s in range(max_step):
        print(f'beam step {s+1} ' + '-' * 50 + '\n')
        beam.beamstep(beam_width,
                      beam.combscoreK_GPT2,
                      template_vec=template_vec,
                      ge=ge,
                      LMModel=LMModel,
                      word_list=word_list,
                      subvocab=subvocab,
                      clustermask=clustermask,
                      mono=mono,
                      alpha=alpha,
                      renorm=renorm,
                      temperature=temperature,
                      stopbyLMeos=stopbyLMeos,
                      bpe2word=bpe2word,
                      normalized=True,
                      ifadditive=False,
                      **kwargs)
        # all beams reach termination
        if beam.endall:
            break
    
    return beam
Beispiel #3
0
    def forward(self, decoder_input, embedded_inputs, hidden, context):
        """
        Args:
            decoder_input: The initial input to the decoder
                size is [batch_size x embedding_dim]. Trainable parameter.
            embedded_inputs: [sourceL x batch_size x embedding_dim]
            hidden: the prev hidden state, size is [batch_size x hidden_dim]. 
                Initially this is set to (enc_h[-1], enc_c[-1])
            context: encoder outputs, [sourceL x batch_size x hidden_dim] 
        """
        def recurrence(x, hidden, logit_mask, prev_idxs, step):
            
            hx, cx = hidden  # batch_size x hidden_dim
            
            gates = self.input_weights(x) + self.hidden_weights(hx)
            ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)

            ingate = F.sigmoid(ingate)
            forgetgate = F.sigmoid(forgetgate)
            cellgate = F.tanh(cellgate)
            outgate = F.sigmoid(outgate)

            cy = (forgetgate * cx) + (ingate * cellgate)
            hy = outgate * F.tanh(cy)  # batch_size x hidden_dim
            
            g_l = hy
            for i in range(self.n_glimpses):
                ref, logits = self.glimpse(g_l, context)
                logits, logit_mask = self.apply_mask_to_logits(step, logits, logit_mask, prev_idxs)
                # [batch_size x h_dim x sourceL] * [batch_size x sourceL x 1] = 
                # [batch_size x h_dim x 1]
                g_l = torch.bmm(ref, self.sm(logits).unsqueeze(2)).squeeze(2) 
            _, logits = self.pointer(g_l, context)
            
            logits, logit_mask = self.apply_mask_to_logits(step, logits, logit_mask, prev_idxs)
            probs = self.sm(logits)
            return hy, cy, probs, logit_mask
    
        batch_size = context.size(1)
        outputs = []
        selections = []
        steps = range(self.max_length)  # or until terminating symbol ?
        inps = []
        idxs = None
        mask = None
       
        if self.decode_type == "stochastic":
            for i in steps:
                hx, cx, probs, mask = recurrence(decoder_input, hidden, mask, idxs, i)
                hidden = (hx, cx)
                # select the next inputs for the decoder [batch_size x hidden_dim]
                decoder_input, idxs = self.decode_stochastic(
                    probs,
                    embedded_inputs,
                    selections)
                inps.append(decoder_input) 
                # use outs to point to next object
                outputs.append(probs)
                selections.append(idxs)
            return (outputs, selections), hidden
        
        elif self.decode_type == "beam_search":
            
            # Expand input tensors for beam search
            decoder_input = Variable(decoder_input.data.repeat(self.beam_size, 1))
            context = Variable(context.data.repeat(1, self.beam_size, 1))
            hidden = (Variable(hidden[0].data.repeat(self.beam_size, 1)),
                    Variable(hidden[1].data.repeat(self.beam_size, 1)))
            
            beam = [
                    Beam(self.beam_size, self.max_length, cuda=self.use_cuda) 
                    for k in range(batch_size)
            ]
            
            for i in steps:
                hx, cx, probs, mask = recurrence(decoder_input, hidden, mask, idxs, i)
                hidden = (hx, cx)
                
                probs = probs.view(self.beam_size, batch_size, -1
                        ).transpose(0, 1).contiguous()
                
                n_best = 1
                # select the next inputs for the decoder [batch_size x hidden_dim]
                decoder_input, idxs, active = self.decode_beam(probs,
                        embedded_inputs, beam, batch_size, n_best, i)
               
                inps.append(decoder_input) 
                # use probs to point to next object
                if self.beam_size > 1:
                    outputs.append(probs[:, 0,:])
                else:
                    outputs.append(probs.squeeze(0))
                # Check for indexing
                selections.append(idxs)
                 # Should be done decoding
                if len(active) == 0:
                    break
                decoder_input = Variable(decoder_input.data.repeat(self.beam_size, 1))

            return (outputs, selections), hidden
Beispiel #4
0
    def translate(self, src, trg, beam_size, Lang2):
        ''' beam search decoding. '''
        '''
        :param src:   [src_max_len, batch]    ## batch = 1
        :param trg:   [trg_max_len, batch]    ## batch = 1
        :param sentence:  [sentence_len]
        :return: best translate candidate
        '''
        max_len = trg.size(0)
        encoder_output, hidden = self.encoder(src)
        '''
            ## src: [src_max_len, batch]
            ## encoder_output: [src_max_len, batch, hidden_size]
            ## hidden: (num_layers * num_directions, batch, hidden_size) -> [2, batch, hidden_size]
        '''
        hidden = hidden[:self.decoder.
                        n_layers]  # [n_layers, batch, hidden_size]
        # trg: [trg_max_len, batch]
        output = Variable(trg.data[0, :])  # sos  [batch]

        beam = Beam(beam_size, Lang2.vocab.stoi, True)
        input_feeding = None
        for t in range(1, max_len):
            # output:  [batch] -> [batch, output_size]
            output, hidden, attn_weights = self.decoder(
                output, hidden, encoder_output, input_feeding)

            input_feeding = output
            output = self.decoder.out(output)
            output = F.log_softmax(output, dim=1)

            workd_lk = output
            if output.size(0) == 1:

                output_prob = output.squeeze(0)  ## [output_size]
                workd_lk = output_prob.expand(
                    beam_size,
                    output_prob.size(0))  ## [beam_size, output_size]

                # [n_layers, batch, hidden_size]
                hidden = hidden.squeeze(1)  # [n_layers, hidden_size]
                hidden = hidden.expand(
                    beam_size, hidden.size(0),
                    hidden.size(1))  # [beam_size, n_layers, hidden_size]
                hidden = hidden.transpose(
                    0, 1)  # [n_layers, beam_size, hidden_size]

                # [src_max_len, batch, hidden_size]
                encoder_output = encoder_output.squeeze(
                    1)  ## [src_max_len, hidden_size]
                encoder_output = encoder_output.expand(
                    beam_size, encoder_output.size(0), encoder_output.size(
                        1))  ## [beam_size, src_max_len, hidden_size]
                encoder_output = encoder_output.transpose(
                    0, 1)  ## [src_max_len, beam_size, hidden_size]
                input_feeding = input_feeding.squeeze(0)
                input_feeding = input_feeding.expand(beam_size,
                                                     input_feeding.size(0))

            flag = beam.advance(workd_lk)
            if flag:
                break

            nextInputs = beam.get_current_state()
            # print("[nextInputs]:", nextInputs)
            output = nextInputs
            # output = Variable(nextInputs).cuda()

            originState = beam.get_current_origin()
            ## print("[origin_state]:", originState)
            hidden = hidden[:, originState]
            input_feeding = input_feeding[originState]

        xx, yy = beam.get_best()
        zz = beam.get_final()
        return xx, yy, zz
Beispiel #5
0
    def decode_batch(self, idx):
        """Decode a minibatch."""
        # Get source minibatch
        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(
            self.src['data'],
            self.src_dict,
            idx,
            self.config['data']['batch_size'],
            self.config['data']['max_src_length'],
            add_start=True,
            add_end=True)

        beam_size = self.beam_size

        #  (1) run the encoder on the src

        context_h, (
            context_h_t,
            context_c_t) = self.get_hidden_representation(input_lines_src)

        context_h = context_h.transpose(0, 1)  # Make things sequence first.

        #  (3) run the decoder to generate sentences, using beam search

        batch_size = context_h.size(1)

        # Expand tensors for each beam.
        context = Variable(context_h.data.repeat(1, beam_size, 1))
        dec_states = [
            Variable(context_h_t.data.repeat(1, beam_size, 1)),
            Variable(context_c_t.data.repeat(1, beam_size, 1))
        ]

        beam = [
            Beam(beam_size, self.tgt_dict, cuda=True)
            for k in range(batch_size)
        ]

        dec_out = self.get_init_state_decoder(dec_states[0].squeeze(0))
        dec_states[0] = dec_out

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(self.config['data']['max_trg_length']):

            input = torch.stack([
                b.get_current_state() for b in beam if not b.done
            ]).t().contiguous().view(1, -1)

            trg_emb = self.model.trg_embedding(Variable(input).transpose(1, 0))
            trg_h, (trg_h_t, trg_c_t) = self.model.decoder(
                trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)),
                context)

            dec_states = (trg_h_t.unsqueeze(0), trg_c_t.unsqueeze(0))

            dec_out = trg_h_t.squeeze(1)
            out = F.softmax(self.model.decoder2vocab(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()

            active = []
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                if not beam[b].advance(word_lk.data[idx]):
                    active += [b]

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = torch.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                # select only the remaining active sentences
                view = t.data.view(-1, remaining_sents,
                                   self.model.decoder.hidden_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(active_idx) \
                    // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            dec_states = (update_active(dec_states[0]),
                          update_active(dec_states[1]))
            dec_out = update_active(dec_out)
            context = update_active(context)

            remaining_sents = len(active)

        #  (4) package everything up

        allHyp, allScores = [], []
        n_best = 1

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()

            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            allHyp += [hyps]

        return allHyp, allScores
Beispiel #6
0
    def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State,
                    beam_size, n_best):
        # seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        dec_states = State

        # print '========= Before ================'
        # print "dec_states:", dec_states[0].size()
        # print "dec_states:", dec_states[1].size()
        # print '=================================='

        # dec_states = [
        #     Variable(dec_states[0].repeat(1, beam_size, 1)),
        #     Variable(dec_states[1].repeat(1, beam_size, 1))
        # ]
        dec_states = [
            dec_states[0].repeat(1, beam_size, 1),
            dec_states[1].repeat(1, beam_size, 1)
        ]

        # print'========== After =================='
        # print "dec_states:", dec_states[0].size()
        # print "dec_states:", dec_states[1].size()
        # print '=================================='
        # exit()

        drop_prob = 0.0
        beam_size = beam_size
        batch_size = 1

        beam = [
            Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size)
        ]

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(seq_len):

            input = t.stack([
                b.get_current_state() for b in beam if not b.done
            ]).t().contiguous().view(1, -1)

            trg_emb = self.embedding_2.word_embed(
                Variable(input).transpose(1, 0))

            # print trg_emb.size()
            # print seed.size()

            trg_h, dec_states = self.decoder.only_decoder_beam(
                trg_emb, seed, drop_prob, dec_states)

            # trg_h, (trg_h_t, trg_c_t) = self.model.decoder(trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)), context )

            # print trg_h.size()
            # print trg_h_t.size()
            # print trg_c_t.size()

            # dec_states = (trg_h_t, trg_c_t)

            # print 'State dimension ----------->'
            # print State[0].size()
            # print State[1].size()
            # print '======================================='
            # print "dec_states:", dec_states[0].size()
            # print "dec_states:", dec_states[1].size()
            # print '========== Things successful ==========='

            # exit()

            dec_out = trg_h.squeeze(1)

            # print "dec_out:", dec_out.size()

            out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()

            active = []
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                if not beam[b].advance(word_lk.data[idx]):
                    active += [b]

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = t.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                # select only the remaining active sentences
                view = t.data.view(-1, remaining_sents,
                                   self.params.decoder_rnn_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(active_idx) \
                    // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            dec_states = (update_active(dec_states[0]),
                          update_active(dec_states[1]))
            dec_out = update_active(dec_out)
            # context = update_active(context)

            remaining_sents = len(active)

        # (4) package everything up

        allHyp, allScores = [], []

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()
            # print scores
            # print ks
            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            # print hyps
            # print "------------------"
            allHyp += [hyps]

        # print '==== Complete ========='

        return allHyp, allScores
    def beam_sample(self, images, image_names, processor, max_seq_length,
                    beam_size):
        """

        :param images:
        :param image_names:
        :param processor:
        :param max_seq_length:
        :param beam_size:
        :return:
        """
        predicted_sentences = dict()

        # get the batch size
        b_size = images.shape[0]

        # Encode
        img_emb = self.encoder(images)

        # compute sentence mixing coefficient
        pi0 = self.softmax_alpha_0(img_emb)

        # compute global topic embedding
        z0 = torch.matmul(pi0, self.desc_decoder.topic_embeddings)

        # prepare decoder initial hidden state
        if self.hinit_method == 'ZEROS':
            h0 = torch.zeros([
                self.lstm_layers, img_emb.shape[0] * beam_size,
                self.hidden_size
            ],
                             device=self.device)
            c0 = torch.zeros([
                self.lstm_layers, img_emb.shape[0] * beam_size,
                self.hidden_size
            ],
                             device=self.device)
        elif self.hinit_method == 'TOPICS':
            h_init = z0.unsqueeze(0)  # seq len, batch size, emb size
            h0 = self.h0_lin(h_init)
            h0 = h0.repeat(self.lstm_layers, beam_size, 1)
            c0 = self.c0_lin(h_init)
            c0 = c0.repeat(self.lstm_layers, beam_size, 1)
        else:
            h0, c0 = None, None
            exit(
                'not a valid hinit_method. Use one of: \'ZEROS\', \'TOPICS\'.')
        hidden_state = (h0, c0)

        # create a variable for summing the past topic distributions
        pi_sum = torch.zeros_like(pi0, device=self.device)

        img_emb = img_emb.repeat(beam_size, 1)
        z0 = z0.repeat(beam_size, 1)

        # create the initial beam
        beam = [
            Beam(beam_size, processor, device=self.device)
            for _ in range(b_size)
        ]

        batch_idx = list(
            range(b_size))  # indicating index for every sample in the batch
        remaining_sents = b_size  # number of samples in batch

        # Decode
        pi_pasts = [torch.zeros_like(pi0, device=self.device)]
        count_pis = torch.zeros(img_emb.shape[0], device=self.device)
        for w_idx in range(max_seq_length):
            # compute z_past
            pi_pasts.append(pi_pasts[-1].clone())
            msk = count_pis != 0
            pi_pasts[-1][msk, :] = pi0[msk, :] / count_pis.unsqueeze(1)[
                msk, :] * pi_sum[msk, :]
            z_past = torch.matmul(pi_pasts[-1],
                                  self.desc_decoder.topic_embeddings)

            # concatenate the image, the topic embeddings and the last hidden state to get the feature vectors
            if self.switch_feature == 'IMAGE':
                switch_features = torch.cat([
                    img_emb, z0, hidden_state[0].view(hidden_state[0].shape[1],
                                                      -1)
                ],
                                            dim=-1)
            elif self.switch_feature == 'PAST_TOPICS':
                switch_features = torch.cat([
                    z_past, z0, hidden_state[0].view(hidden_state[0].shape[1],
                                                     -1)
                ],
                                            dim=-1)
            else:
                switch_features = None
                exit(
                    'not a valid switch_feature. Use one of: \'IMAGE\', \'PAST_TOPICS\'.'
                )

            if self.desc_feature == 'BOTH':
                desc_features = torch.cat([
                    img_emb, z0, hidden_state[0].view(hidden_state[0].shape[1],
                                                      -1), z_past
                ],
                                          dim=-1)
            elif self.desc_feature == 'IMAGE_ONLY':
                desc_features = torch.cat([img_emb, z0, z_past], dim=-1)
            elif self.desc_feature == 'PAST_ONLY':
                desc_features = torch.cat([
                    z0, hidden_state[0].view(hidden_state[0].shape[1], -1),
                    z_past
                ],
                                          dim=-1)
            elif self.desc_feature == 'NEITHER':
                desc_features = torch.cat([z0, z_past], dim=-1)
            else:
                desc_features = None
                exit(
                    'not a valid switch_feature. Use one of: \'BOTH\', \'IMAGE_ONLY\', \'PAST_ONLY\', \'NEITHER\'.'
                )
            input_ = torch.stack([
                b.get_current_state() for b in beam if not b.done
            ]).view(-1, 1)

            # the topic features are now of repeats of the batch stacked under each other. It should be
            # repeats of the sample for the remaining samples in the batch. So: [[1],[2],[1],[2]] -> [[1],[1],[2],[2]]
            switch_features = torch.stack([
                switch_features[range(i, switch_features.shape[0], b_size)]
                for i in range(remaining_sents)
            ]).view(switch_features.shape[0], -1)
            desc_features = torch.stack([
                desc_features[range(i, desc_features.shape[0], b_size)]
                for i in range(remaining_sents)
            ]).view(desc_features.shape[0], -1)

            # compute the switch
            Bi = self.sigmoid_s(switch_features)

            # compute the next timesteps
            pred_lang_model, hidden_state = self.lang_decoder(
                input_, hidden_state)
            pred_lang_model = pred_lang_model.squeeze(1)
            (pred_desc_model, pii) = self.desc_decoder(desc_features, z0)

            # select which prediction to use based on the switch
            out = pred_desc_model
            mask = torch.round(Bi).type(torch.uint8).squeeze()
            out[mask, :] = pred_lang_model[mask, :]
            out = torch.softmax(out, dim=-1)
            # update pi pasts
            pi_sum[1 - mask, :] = pi_sum[1 - mask, :] + pii[1 - mask, :]
            count_pis[1 - mask] = count_pis[1 - mask] + 1

            # process lstm step in beam search
            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()
            active = []  # list of not finished samples
            for b in range(b_size):
                # if the current sample is done, skip it
                if beam[b].done:
                    continue

                # get the original index of the sample
                idx = batch_idx[b]
                if not beam[b].advance(
                        word_lk.data[idx]):  # returns true if complete
                    active.append(b)

                for dec_state in hidden_state:  # iterate over h, c
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))
            # test if the beam is finished
            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = torch.LongTensor([batch_idx[k]
                                           for k in active]).to(self.device)
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t, hidden_size):
                # select only the remaining active sentences
                view = t.data.view(-1, remaining_sents, hidden_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(
                    active_idx) // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            hidden_state = (update_active(hidden_state[0], self.hidden_size),
                            update_active(hidden_state[1], self.hidden_size))
            img_emb = update_active(img_emb, self.embedding_size)
            z0 = update_active(z0, self.hidden_size)
            remaining_sents = len(active)

        # select the best hypothesis
        for b in range(b_size):
            score_, k = beam[b].get_best()
            hyp = beam[b].get_hyp(k)
            predicted_sentences[image_names[b]] = [
                processor.i2w[idx.item()] for idx in hyp
            ]
        return predicted_sentences
    def beam_sample(self, images, image_names, processor, max_seq_length,
                    beam_size):
        """

        :param images:
        :param image_names:
        :param processor:
        :param max_seq_length:
        :param beam_size:
        :return:
        """
        predicted_sentences = dict()
        # Encode
        img_emb = self.encoder(images)
        # prepare decoder initial hidden state
        img_emb = img_emb.unsqueeze(0)  # seq len, batch size, emb size
        h0 = self.h0_lin(img_emb)
        h0 = h0.repeat(self.lstm_layers, beam_size,
                       1)  # for each chain in the beam a copy of hidden
        c0 = self.c0_lin(img_emb)
        c0 = c0.repeat(self.lstm_layers, beam_size, 1)
        hidden_state = (h0, c0)

        b_size = images.shape[0]

        # create the initial beam
        beam = [
            Beam(beam_size, processor, device=self.device)
            for _ in range(b_size)
        ]

        batch_idx = list(
            range(b_size))  # indicating index for every sample in the batch
        remaining_sents = b_size  # number of samples in batch

        # Decode
        for w_idx in range(max_seq_length):
            input_ = torch.stack([
                b.get_current_state() for b in beam if not b.done
            ]).view(-1, 1)
            out, hidden_state = self.decoder(input_, hidden_state)
            out = torch.softmax(out, dim=2)

            # process lstm step in beam search
            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()
            active = []  # list of not finished samples
            for b in range(b_size):
                # if the current sample is done, skip it
                if beam[b].done:
                    continue

                # get the original index of the sample
                idx = batch_idx[b]
                if not beam[b].advance(
                        word_lk.data[idx]):  # returns true if complete
                    active.append(b)

                for dec_state in hidden_state:  # iterate over h, c
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            # test if the beam is finished
            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = torch.LongTensor([batch_idx[k]
                                           for k in active]).to(self.device)
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t, hidden_size):
                # select only the remaining active sentences
                view = t.data.view(-1, remaining_sents, hidden_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(
                    active_idx) // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            hidden_state = (update_active(hidden_state[0], self.hidden_size),
                            update_active(hidden_state[1], self.hidden_size))
            remaining_sents = len(active)

        # select the best hypothesis
        for b in range(b_size):
            score_, k = beam[b].get_best()
            hyp = beam[b].get_hyp(k)
            predicted_sentences[image_names[b]] = [
                processor.i2w[idx.item()] for idx in hyp
            ]
        return predicted_sentences
Beispiel #9
0
    def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State,
                    beam_size, n_best):
        # seed = Variable(t.from_numpy(seed).float())
        if use_cuda:
            seed = seed.cuda()

        decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(
            1)

        decoder_word_input = Variable(
            t.from_numpy(decoder_word_input_np).long())
        decoder_character_input = Variable(
            t.from_numpy(decoder_character_input_np).long())

        if use_cuda:
            decoder_word_input, decoder_character_input = decoder_word_input.cuda(
            ), decoder_character_input.cuda()

        dec_states = State

        dec_states = [
            dec_states[0].repeat(1, beam_size, 1),
            dec_states[1].repeat(1, beam_size, 1)
        ]

        drop_prob = 0.0
        beam_size = beam_size
        batch_size = 1

        beam = [
            Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size)
        ]

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(seq_len):

            input = t.stack([
                b.get_current_state() for b in beam if not b.done
            ]).t().contiguous().view(1, -1)

            trg_emb = self.embedding_2.word_embed(
                Variable(input).transpose(1, 0))

            # print trg_emb.size()
            # print seed.size()

            trg_h, dec_states = self.decoder.only_decoder_beam(
                trg_emb, seed, drop_prob, dec_states)

            dec_out = trg_h.squeeze(1)

            # print "dec_out:", dec_out.size()

            out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()

            active = []
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                if not beam[b].advance(word_lk.data[idx]):
                    active += [b]

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            if not active:
                break

            active_idx = t.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                view = t.data.view(-1, remaining_sents,
                                   self.params.decoder_rnn_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(active_idx) \
                               // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            dec_states = (update_active(dec_states[0]),
                          update_active(dec_states[1]))
            dec_out = update_active(dec_out)

            remaining_sents = len(active)

        allHyp, allScores = [], []

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()
            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            allHyp += [hyps]

        return allHyp, allScores
def gensummary_elmo(template_vec,
                    ee,
                    vocab,
                    LMModel,
                    word_list,
                    subvocab,
                    clustermask=None,
                    mono=True,
                    renorm=True,
                    temperature=1,
                    elmo_layer='avg',
                    max_step=20,
                    beam_width=10,
                    beam_width_start=10,
                    alpha=0.1,
                    alpha_start=0.1,
                    begineos=True,
                    stopbyLMeos=False,
                    devid=0,
                    **kwargs):
    """
    Unsupervised sentence summary generation using beam search, by contextual matching and a summary style language model.
    The contextual matching here is on top of pretrained ELMo embeddings.
    
    Input:
        - template_vec (torch.Tensor): forward only ELMo embeddings of the source sentence.
            'torch.Tensor' of size (3, seq_len, 512).
        - ee (elmo_sequential_embedder.ElmoEmbedderForward): 'elmo_sequential_embedder.ElmoEmbedderForward' object.
        - vocab (torchtext.vocab.Vocab): 'torchtext.vocab.Vocab' object. Should be the same as is used for the
            pretrained language model.
        - LMModel (user defined torch.nn.Module): a pretrained language model on the summary sentences.
        - word_list (list): a list of words in the vocabulary to work with. 'List'.
        - subvocab (torch.LongTensor): 'torch.LongTensor' consisting of the indices of the words corresponding
            to `word_list`.
        - clustermask (torch.ByteTensor): a binary mask for each of the sub-vocabulary word.
            'torch.ByteTensor' of size (len(sub-vocabulary), len(vocabulary)). Default:None.
        - mono (bool): whether to keep monotonicity contraint. Default: True.
        - renorm (bool): whether to renormalize the probabilities over the sub-vocabulary. Default: True.
        - temperature (float): temperature applied to the softmax in the language model. Default: 1.
        - elmo_layer (str): which ELMo layer to use as the word type representation.
            Choose from ['avg', 'cat', 'bot', 'mid', 'top']. Default: 'avg'.
        - max_step (int): maximum number of beam steps.
        - beam_width (int): beam width.
        - beam_width_start (int): beam width of the first step.
        - alpha (float): the amount of language model part used for scoring. The score is:
            (1 - \alpha) * similarity_logscore + \alpha * LM_logscore.
        - alpha_start (float): the amount of language model part used for scoring, only for the first step.
        - begineos (bool): whether to begin with the special '<eos>' token as is trained in the language model.
            Note that ELMo has its own special beginning token. Default: True.
        - stopbyLMeos (bool): whether to stop a sentence solely by the language model predicting '<eos>' as the
            top possibility. Default: False.
        - devid (int): device id to run the algorithm and LSTM language models. 'int', default: 0. -1 for cpu.
        **kwargs: other arguments input to function <Beam.beamstep>. 
            E.g. - normalized (bool): whether to normalize the dot product when calculating the similarity,
                     which makes it cosine similarity. Default: True.
                 - ifadditive (bool): whether to use an additive model on mixing the probability scores. Default: False.
    
    Output:
        - beam (beam_search.Beam): 'Beam' object, recording all the generated sequences.
        
    """
    device = 'cpu' if devid == -1 else f'cuda:{devid}'

    # Beam Search: initialization
    if begineos:
        beam = Beam(1,
                    vocab,
                    init_ids=[vocab.stoi['<eos>']],
                    device=device,
                    sim_score=0,
                    lm_score=0,
                    lm_state=None,
                    elmo_state=None,
                    align_loc=None)
    else:
        beam = Beam(1,
                    vocab,
                    init_ids=[None],
                    device=device,
                    sim_score=0,
                    lm_score=0,
                    lm_state=None,
                    elmo_state=None,
                    align_loc=None)

    # first step: start with 'beam_width_start' best matched words
    beam.beamstep(
        beam_width_start,
        beam.combscoreK,
        template_vec=template_vec,
        ee=ee,
        LMModel=LMModel,
        word_list=word_list,
        subvocab=subvocab,
        clustermask=clustermask,
        alpha=alpha_start,
        renorm=renorm,
        temperature=temperature,
        elmo_layer=elmo_layer,
        # normalized=True,
        # ifadditive=False,
        **kwargs)

    # run beam search, until all sentences hit <EOS> or max_step reached
    for s in range(max_step):
        print(f'beam step {s + 1} ' + '-' * 50 + '\n')
        beam.beamstep(
            beam_width,
            beam.combscoreK,
            template_vec=template_vec,
            ee=ee,
            LMModel=LMModel,
            word_list=word_list,
            subvocab=subvocab,
            clustermask=clustermask,
            mono=mono,
            alpha=alpha,
            renorm=renorm,
            temperature=temperature,
            stopbyLMeos=stopbyLMeos,
            elmo_layer=elmo_layer,
            # normalized=True,
            # ifadditive=False,
            **kwargs)
        # all beams reach termination
        if beam.endall:
            break

    return beam
Beispiel #11
0
    def decode_batch(self, idx):
        """Decode a minibatch."""
        # Get source minibatch
        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(
            self.src['data'], self.src_dict, idx,
            self.config['data']['batch_size'],
            self.config['data']['max_src_length'], add_start=True, add_end=True
        )
        #print(self.src_dict)
        '''
        lines = [
                ['<s>'] + line + ['</s>']
                for line in self.src['data'][idx:idx + self.config['data']['max_src_length']]
                ]
        lines = [line[:self.config['data']['max_src_length']] for line in lines]
        lens = [len(line) for line in lines]
        max_len = max(lens)
        word2ind = self.src_dict
        input_lines = [
                [word2ind[w] if w in word2ind else word2ind['<unk>'] for w in line[:-1]] +              
                [word2ind['<pad>']] * (max_len - len(line))                                             
                for line in lines                                                                       
                ]
        #print(len(input_lines))
        #print(input_lines_src[0])
        '''
        #id2word_src = {v: k for k, v in self.src_dict.iteritems()}
        #inp = input_lines_src[0].data.cpu().numpy().tolist()
        #print([inv_dict[a] for a in inp])
        beam_size = self.beam_size

        #  (1) run the encoder on the src

        context_h, (context_h_t, context_c_t) = self.get_hidden_representation(
            input_lines_src
        )

        context_h = context_h.transpose(0, 1)  # Make things sequence first.

        #  (3) run the decoder to generate sentences, using beam search

        batch_size = context_h.size(1)

        # Expand tensors for each beam.
        context = Variable(context_h.data.repeat(1, beam_size, 1))
        #print context.size()
        dec_states = [
            Variable(context_h_t.data.repeat(1, beam_size, 1)),
            Variable(context_c_t.data.repeat(1, beam_size, 1))
        ]

        beam = [
            Beam(beam_size, self.tgt_dict, self.id2word_src, trg['id2word'], cuda=True)
            for k in range(batch_size)
        ]

        dec_out = self.get_init_state_decoder(dec_states[0].squeeze(0))
        dec_states[0] = dec_out
        #print(dec_states[0].size())

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(self.config['data']['max_trg_length']):
            #print(i)
            input = torch.stack(
                [b.get_current_state() for b in beam if not b.done]
            ).t().contiguous().view(1, -1)

            trg_emb = self.model.trg_embedding(Variable(input).transpose(1, 0))
            #print trg_emb.size()
            #print dec_states[0].size(), dec_states[1].size()
            #print context.size()
            trg_h, (trg_h_t, trg_c_t) = self.model.decoder(
                trg_emb,
                (dec_states[0].squeeze(0), dec_states[1].squeeze(0)),
                context
            )

            dec_states = (trg_h_t.unsqueeze(0), trg_c_t.unsqueeze(0))

            dec_out = trg_h_t.squeeze(1).view(-1, self.model.trg_hidden_dim)
            #print dec_out.size()
            out = F.softmax(self.model.decoder2vocab(dec_out)).unsqueeze(0)

            word_lk = out.view(
                beam_size,
                remaining_sents,
                -1
            ).transpose(0, 1).contiguous()

            active = []
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                #print(idx, len(lines), input_lines_src.size())
                if not beam[b].advance(word_lk.data[idx], input_lines_src[idx]):
                    active += [b]

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    #print dec_state.size(1), dec_state.size(2), dec_state.size(3)
                    state_size = dec_state.size(1) * dec_state.size(3) if self.model.nlayers_trg > 1 else dec_state.size(2)
                    sent_states = dec_state.view(
                        -1, beam_size, remaining_sents, state_size
                    )[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1,
                            beam[b].get_current_origin()
                        )
                    )

            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = torch.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                # select only the remaining active sentences
                view = t.data.view(
                    -1, remaining_sents,
                    self.model.decoder.hidden_size
                )
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(active_idx) \
                    // remaining_sents
                return Variable(view.index_select(
                    1, active_idx
                ).view(*new_size))

            dec_states = (
                update_active(dec_states[0]),
                update_active(dec_states[1])
            )
            dec_out = update_active(dec_out)
            context = update_active(context)

            remaining_sents = len(active)

        #  (4) package everything up

        allHyp, allScores = [], []
        n_best = 1

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()
            #print(ks)

            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            #print(hyps)
            allHyp += [hyps]

        return allHyp, allScores
Beispiel #12
0
    def generate(self,
                 init_hidden,
                 encoder_outputs,
                 max_gen_length,
                 beam_size=1):
        # The hidden state in RNNs in Pytorch is always (seq_length, batch_size, emb_size) - even if you use batch_first
        # Note that during generation, the batch size should always be 1
        if self.bidirectional_enc:
            self.hidden = Variable(
                torch.zeros(self.num_layers, 1,
                            self.hidden_size))  #init to correct size
            if use_cuda:
                self.hidden = self.hidden.cuda()
            for x in range(self.num_layers):
                self.hidden[x] = torch.cat(
                    (init_hidden[2 * x], init_hidden[1 + 2 * x]), 1
                )  #concatenate the appropriate bidirectional hidden states
        else:
            self.hidden = init_hidden
        # Setup inputs and contexts
        #decoder_input = Variable(torch.LongTensor(init_hidden.shape[1] * [[SOS]]))
        #decoder_input = decoder_input.cuda() if use_cuda else decoder_input
        decoder_contexts = Variable(torch.zeros(1, 1, self.hidden_size))
        decoder_contexts = decoder_contexts.cuda(
        ) if use_cuda else decoder_contexts
        attn_scores = self.attn.calc_attn_scores(encoder_outputs)

        # Accumulate the output scores and words generated by the model
        source_len = encoder_outputs.shape[1]
        beam = Beam(beam_size, source_len)
        beam.add_initial_path(decoder_contexts, self.hidden)
        for i in range(max_gen_length):
            # Get paths up front since the dict changes size while iterating
            all_paths = [p for p in beam]
            for path in all_paths:
                decoder_input, decoder_contexts, hidden = beam.get_decoder_params(
                    path)
                decoder_outputs, decoder_contexts, attn_weights, hidden = self.__forward_one_word(
                    decoder_input, decoder_contexts, encoder_outputs,
                    attn_scores, hidden)
                # Add the potential next steps for the current beam path
                beam.add_paths(path,
                               decoder_outputs,
                               decoder_contexts,
                               hidden,
                               attn=attn_weights)
            beam.prune()

            # Break if all beam paths have ended
            if beam.is_ended(i):
                break

        outputs, words, attn_weights_matrix = beam.get_best_path_results()
        return outputs, words, attn_weights_matrix