Example #1
0
def train_abstractor(model,
                     data,
                     learning_rate=1e-3,
                     n_iters=10000,
                     model_output_file="results/models/abstractor.pt",
                     save_freq=10):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for i in range(n_iters):
        # Todo: Get more than two
        source_documents, target_summaries = get_training_batch(data, 2)

        # Obtain embeddings
        source_document_embeddings, __, __ = obtain_word_embeddings(
            model.bert_model,
            model.bert_tokenizer,
            source_documents,
            static_embeddings=False)
        target_summary_embeddings, target_mask, target_tokens = obtain_word_embeddings(
            model.bert_model,
            model.bert_tokenizer,
            target_summaries,
            static_embeddings=True)

        # Shift target tokens and format masks
        target_mask = torch.flatten(target_mask[:, :, 0])
        target_tokens = torch.roll(target_tokens, dims=1,
                                   shifts=-1)  # shift left
        target_tokens[:, -1] = 0
        target_tokens = torch.flatten(target_tokens)

        # Obtain extraction probability for each word in vocabulary
        extraction_probabilities, teacher_forcing = model(
            source_document_embeddings,
            target_summary_embeddings,
            teacher_forcing_pct=1.0
        )  # (batch_size, n_target_words, vocab_size)

        # Obtain negative log likelihood loss
        # Todo: Double check to see if ordering is correct after flatten and .view() reshaping
        loss = nll_loss(extraction_probabilities.view(-1, model.vocab_size),
                        target_tokens)

        loss = loss * target_mask
        loss = loss.sum()
        print(f"Loss: {loss} (teacher_forcing: {teacher_forcing})")

        # Update model
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % save_freq == 0:
            torch.save(model.state_dict(), model_output_file)

    return
    def create_abstracted_sentences(
        self,
        batch_actions,
        source_documents,
        n_ext_sents,
        teacher_forcing_pct=0.0,
        target_summary_embeddings=None,
    ):
        """
        Creates a summary from extracted sentences indicated by batch_actions

        :param batch_actions: A torch.tensor containing the indicies of sentences to extract
        :param source_documents: A list(list(document_sentences))
        :param n_ext_sents: A torch.tensor containing the # of sentences extracted per document
        :param teacher_forcing_pct: Percentage of the time to use directly use target_summary_embeddings
        :param target_summary_embeddings: A torch.tensor containing embeddings of each word within the target summary
                                          (oracle). Shape: (batch_size, n_summary_label_words, embedding_dim)
        :return: A tuple containing:
                - chosen_words: torch.tensor containing corpus indicies of words to use in summary.
                                Shape: (batch_size, n_summary_predicted_words)
                - word_probabilities: torch.tensor containing the probability of extracting each word in corpus.
                                      Shape: (batch_size, n_summary_predicted_words, n_words_in_corpus)
        """
        # Obtain embeddings
        actions = [
            action_indicies[:n_ext_sent]
            for action_indicies, n_ext_sent in zip(batch_actions, n_ext_sents)
        ]

        # Obtain actual string sentences that were exctracted
        extracted_sentences = [
            np.array(source_doc)[a].tolist()
            for source_doc, a in zip(source_documents, actions)
        ]
        source_document_embeddings, __, __ = obtain_word_embeddings(
            self.extractor_model.bert_model,
            self.extractor_model.bert_tokenizer,
            extracted_sentences,
            static_embeddings=False)
        # Obtain extraction probability for each word in vocabulary
        word_probabilities = self.abstractor_model.forward(
            source_document_embeddings,
            target_summary_embeddings,
            teacher_forcing_pct=teacher_forcing_pct)[
                0]  # (batch_size, n_target_words, vocab_size)

        # Get words with highest probability per time step
        chosen_words = torch.argmax(word_probabilities, dim=2)

        return chosen_words, word_probabilities
def train_rl(rl_model, data, n_iters=10000):
    """
    :param rl_model:
    :param data:
    :param n_iters:
    :return:
    """
    for i in range(n_iters):
        # Obtain batch:
        source_documents, target_summaries = get_training_batch(data,
                                                                batch_size=4)

        # Obtain embeddings
        # Todo: Fix this
        source_sentence_embeddings, source_mask = obtain_sentence_embeddings(
            rl_model.extractor_model.bert_model,
            rl_model.extractor_model.bert_tokenizer, source_documents)

        target_summary_embeddings, target_mask, target_tokens = obtain_word_embeddings(
            rl_model.abstractor_model.bert_model,
            rl_model.abstractor_model.bert_tokenizer,
            target_summaries,
            static_embeddings=True)

        # Obtain exctracted sentences (actions)
        actions, log_probs, entropys, values, n_ext_sents = rl_model.sample_actions(
            source_sentence_embeddings, source_mask)

        # Obtain abstracted sentence from abstractor
        predicted_tokens, word_probabilities = rl_model.create_abstracted_sentences(
            actions,
            source_documents,
            n_ext_sents=n_ext_sents,
            teacher_forcing_pct=1.0,
            target_summary_embeddings=target_summary_embeddings)

        # Obtain returns from ROUGE
        n_actions = actions.shape[1]
        rewards = rl_model.determine_rewards(n_ext_sents, n_actions,
                                             predicted_tokens, target_tokens,
                                             target_mask)

        # Calc trajectories
        rl_model.update(rewards, log_probs, entropys, values, n_ext_sents,
                        word_probabilities, target_tokens, target_mask)
Example #4
0
    def create_abstracted_sentences(self,
                                    batch_actions,
                                    source_documents,
                                    stop_action_index,
                                    teacher_forcing_pct=0.0,
                                    target_summary_embeddings=None):
        """
        :param batch_actions:
        :param source_documents:
        :param stop_action_index:
        :param teacher_forcing_pct:
        :param target_summary_embeddings:
        :return:
        """
        # Obtain embeddings
        actions = list()
        for trajectory_actions in batch_actions:
            if trajectory_actions[-1] == stop_action_index:
                actions.append(trajectory_actions[:-1])
            else:
                actions.append(trajectory_actions)

        extracted_sentences = [
            np.array(source_doc)[a].tolist()
            for source_doc, a in zip(source_documents, actions)
        ]
        source_document_embeddings, __, __ = obtain_word_embeddings(
            self.extractor_model.bert_model,
            self.extractor_model.bert_tokenizer,
            extracted_sentences,
            static_embeddings=False)
        # Obtain extraction probability for each word in vocabulary
        word_probabilities = torch.exp(
            self.abstractor_model(source_document_embeddings,
                                  target_summary_embeddings,
                                  teacher_forcing_pct=teacher_forcing_pct)
            [0])  # (batch_size, n_target_words, vocab_size)

        # Get words with highest probability per time step
        chosen_words = torch.argmax(word_probabilities, dim=2)

        return chosen_words
Example #5
0
def train_system(rl_model, data, n_iters=5):
    """
    :param extractor_model:
    :param abstractor_model:
    :param rl_model:
    :param data:
    :return:
    """
    # Todo: Figure out when to create training data: Don't have to get embeddings on each iter
    # Obtain batch:
    source_documents, target_summaries = get_training_batch(data, batch_size=2)

    # Obtain embeddings
    source_sentence_embeddings, source_mask = obtain_sentence_embeddings(
        rl_model.extractor_model.bert_model,
        rl_model.extractor_model.bert_tokenizer,
        source_documents
    )
    stop_action_index = source_sentence_embeddings.shape[1]
    target_summary_embeddings, target_mask, target_tokens = obtain_word_embeddings(
        rl_model.abstractor_model.bert_model,
        rl_model.abstractor_model.bert_tokenizer,
        target_summaries,
        static_embeddings=True
    )

    for i in range(n_iters):
        # Run trajectory
        batch_actions, log_probs, values = rl_model.sample_actions(source_sentence_embeddings, source_mask)

        # Obtain abstracted sentence from abstractor
        abstract_sentence_indicies = rl_model.create_abstracted_sentences(
            batch_actions,
            source_documents,
            stop_action_index,
            teacher_forcing_pct=1.0,
            target_summary_embeddings=target_summary_embeddings
        )

        # Obtain returns from ROUGE
        rewards = rl_model.determine_rewards(abstract_sentence_indicies, target_tokens, target_mask)