def gensummary_gpt2(template_vec, ge, vocab, LMModel, word_list, subvocab, clustermask=None, mono=True, renorm=True, temperature=1, bpe2word='last', max_step = 20, beam_width = 10, beam_width_start = 10, alpha=0.1, alpha_start=0.1, begineos=True, stopbyLMeos=False, devid=0, **kwargs): """ Unsupervised sentence summary generation using beam search, by contextual matching and a summary style language model. The contextual matching here is on top of pretrained ELMo embeddings. Input: template_vec: forward only ELMo embeddings of the source sentence. 'torch.Tensor' of size (3, seq_len, 512). ge: 'gpt2_sequential_embedder.GPT2Embedder' object. vocab: 'torchtext.vocab.Vocab' object. Should be the same as is used for the pretrained language model. LMModel: a pretrained language model on the summary sentences. word_list: a list of words in the vocabulary to work with. 'List'. subvocab: 'torch.LongTensor' consisting of the indices of the words corresponding to 'word_list'. clustermask: a binary mask for each of the sub-vocabulary word. 'torch.ByteTensor' of size (len(sub-vocabulary), len(vocabulary)). Default:None. mono: whether to keep monotonicity contraint. Default: True. renorm: whether to renormalize the probabilities over the sub-vocabulary. Default: True. Temperature: temperature applied to the softmax in the language model. Default: 1. bpe2word: how to turn the BPE vectors into word vectors. Choose from ['last', 'avg']. Default: 'last'. max_step: maximum number of beam steps. beam_width: beam width. beam_width_start: beam width of the first step. alpha: the amount of language model part used for scoring. The score is: (1 - \alpha) * similarity_logscore + \alpha * LM_logscore. begineos: whether to begin with the special '<eos>' token as is trained in the language model. Note that ELMo has its own special beginning token. Default: True. stopbyLMeos: whether to stop a sentence solely by the language model predicting '<eos>' as the top possibility. Default: False. devid: device id to run the algorithm and LSTM language models. 'int', default: 0. -1 for cpu. **kwargs: other arguments input to function <Beam.beamstep>. E.g. normalized: whether to normalize the dot product when calculating the similarity, which makes it cosine similarity. Default: True. ifadditive: whether to use an additive model on mixing the probability scores. Default: False. Output: beam: 'Beam' object, recording all the generated sequences. """ device = 'cpu' if devid == -1 else f'cuda:{devid}' # Beam Search: initialization if begineos: beam = Beam(1, vocab, init_ids=[vocab.stoi['<eos>']], device=device, sim_score=0, lm_score=0, lm_state=None, gpt2_state=None, align_loc=None) else: beam = Beam(1, vocab, init_ids=[None], device=device, sim_score=0, lm_score=0, lm_state=None, gpt2_state=None, align_loc=None) # first step: start with 'beam_width_start' best matched words beam.beamstep(beam_width_start, beam.combscoreK_GPT2, template_vec=template_vec, ge=ge, LMModel=LMModel, word_list=word_list, subvocab=subvocab, clustermask=clustermask, alpha=alpha_start, renorm=renorm, temperature=temperature, bpe2word=bpe2word, normalized=True, ifadditive=False, **kwargs) # run beam search, until all sentences hit <EOS> or max_step reached for s in range(max_step): print(f'beam step {s+1} ' + '-' * 50 + '\n') beam.beamstep(beam_width, beam.combscoreK_GPT2, template_vec=template_vec, ge=ge, LMModel=LMModel, word_list=word_list, subvocab=subvocab, clustermask=clustermask, mono=mono, alpha=alpha, renorm=renorm, temperature=temperature, stopbyLMeos=stopbyLMeos, bpe2word=bpe2word, normalized=True, ifadditive=False, **kwargs) # all beams reach termination if beam.endall: break return beam
def gensummary_elmo(template_vec, ee, vocab, LMModel, word_list, subvocab, clustermask=None, mono=True, renorm=True, temperature=1, elmo_layer='avg', max_step=20, beam_width=10, beam_width_start=10, alpha=0.1, alpha_start=0.1, begineos=True, stopbyLMeos=False, devid=0, **kwargs): """ Unsupervised sentence summary generation using beam search, by contextual matching and a summary style language model. The contextual matching here is on top of pretrained ELMo embeddings. Input: - template_vec (torch.Tensor): forward only ELMo embeddings of the source sentence. 'torch.Tensor' of size (3, seq_len, 512). - ee (elmo_sequential_embedder.ElmoEmbedderForward): 'elmo_sequential_embedder.ElmoEmbedderForward' object. - vocab (torchtext.vocab.Vocab): 'torchtext.vocab.Vocab' object. Should be the same as is used for the pretrained language model. - LMModel (user defined torch.nn.Module): a pretrained language model on the summary sentences. - word_list (list): a list of words in the vocabulary to work with. 'List'. - subvocab (torch.LongTensor): 'torch.LongTensor' consisting of the indices of the words corresponding to `word_list`. - clustermask (torch.ByteTensor): a binary mask for each of the sub-vocabulary word. 'torch.ByteTensor' of size (len(sub-vocabulary), len(vocabulary)). Default:None. - mono (bool): whether to keep monotonicity contraint. Default: True. - renorm (bool): whether to renormalize the probabilities over the sub-vocabulary. Default: True. - temperature (float): temperature applied to the softmax in the language model. Default: 1. - elmo_layer (str): which ELMo layer to use as the word type representation. Choose from ['avg', 'cat', 'bot', 'mid', 'top']. Default: 'avg'. - max_step (int): maximum number of beam steps. - beam_width (int): beam width. - beam_width_start (int): beam width of the first step. - alpha (float): the amount of language model part used for scoring. The score is: (1 - \alpha) * similarity_logscore + \alpha * LM_logscore. - alpha_start (float): the amount of language model part used for scoring, only for the first step. - begineos (bool): whether to begin with the special '<eos>' token as is trained in the language model. Note that ELMo has its own special beginning token. Default: True. - stopbyLMeos (bool): whether to stop a sentence solely by the language model predicting '<eos>' as the top possibility. Default: False. - devid (int): device id to run the algorithm and LSTM language models. 'int', default: 0. -1 for cpu. **kwargs: other arguments input to function <Beam.beamstep>. E.g. - normalized (bool): whether to normalize the dot product when calculating the similarity, which makes it cosine similarity. Default: True. - ifadditive (bool): whether to use an additive model on mixing the probability scores. Default: False. Output: - beam (beam_search.Beam): 'Beam' object, recording all the generated sequences. """ device = 'cpu' if devid == -1 else f'cuda:{devid}' # Beam Search: initialization if begineos: beam = Beam(1, vocab, init_ids=[vocab.stoi['<eos>']], device=device, sim_score=0, lm_score=0, lm_state=None, elmo_state=None, align_loc=None) else: beam = Beam(1, vocab, init_ids=[None], device=device, sim_score=0, lm_score=0, lm_state=None, elmo_state=None, align_loc=None) # first step: start with 'beam_width_start' best matched words beam.beamstep( beam_width_start, beam.combscoreK, template_vec=template_vec, ee=ee, LMModel=LMModel, word_list=word_list, subvocab=subvocab, clustermask=clustermask, alpha=alpha_start, renorm=renorm, temperature=temperature, elmo_layer=elmo_layer, # normalized=True, # ifadditive=False, **kwargs) # run beam search, until all sentences hit <EOS> or max_step reached for s in range(max_step): print(f'beam step {s + 1} ' + '-' * 50 + '\n') beam.beamstep( beam_width, beam.combscoreK, template_vec=template_vec, ee=ee, LMModel=LMModel, word_list=word_list, subvocab=subvocab, clustermask=clustermask, mono=mono, alpha=alpha, renorm=renorm, temperature=temperature, stopbyLMeos=stopbyLMeos, elmo_layer=elmo_layer, # normalized=True, # ifadditive=False, **kwargs) # all beams reach termination if beam.endall: break return beam