コード例 #1
0
 def __init__(self):
     """
     ROUGE based summariser. This compares each sentence in the paper to the abstract to see which ones make the best
     summaries for the abstract. It is assumed that these sentences will then also be good highlights for the paper.
     """
     self.summary_length = 10
     self.r = Rouge()
     self.preprocessor = AbstractNetPreprocessor()
     self.computation_graph = graph()
     self.features_input = self.computation_graph["features_input"]
     self.prediction_probs = self.computation_graph["prediction_probs"]
     self.similarity_threshold = 0.75
コード例 #2
0
    # Create the feed_dict
    feed_dict = {
        sentence_input: batch_inputs,
        abstract_input: batch_abstracts,
        features_input: batch_features,
        labels: batch_labels,
        seq_lens: lens,
        keep_prob: 1
    }

    # Run accuracy and loss
    raw_probs_summnet = sess.run(raw_predictions, feed_dict=feed_dict)
    prob_pos_summnet = raw_probs_summnet[:, 1]

tf.reset_default_graph()
features_graph = features_mlp.graph()
features_prediction_probs = features_graph["prediction_probs"]
sentence_input = features_graph["features_input"]
labels = features_graph["labels"]
loss = features_graph["loss"]
predictions = features_graph["prediction_probs"]
pred_answers = features_graph["prediction_class"]
correct_answers = features_graph["correct_answers"]
accuracy = features_graph["accuracy"]

with tf.Session() as sess:

    # Initialise all variables
    sess.run(tf.global_variables_initializer())

    # Saving object
コード例 #3
0
    def summarise(self, filename):
        """
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :return: a sumamry of the paper.
        """

        # Each item has form (sentence, sentence_vector, abstract_vector, features)
        paper = self.prepare_paper(filename)

        # ========> Code from here on is summariser specific <========

        # Stores sentences, the probability of them being good summaries and their position in the paper
        sentences_and_summary_probs = []

        # Summary according to features
        sentences_feat_summary_probs = []

        tf.reset_default_graph()
        computation_graph = lstm_classifier.graph()
        sentence_input = computation_graph["inputs"]
        seq_lens = computation_graph["sequence_lengths"]
        prediction_probs = computation_graph["prediction_probs"]
        keep_prob = computation_graph["keep_prob"]

        with tf.Session() as sess:

            # Initialise all variables
            sess.run(tf.global_variables_initializer())

            # Saving object
            saver = tf.train.Saver()

            # Restore the saved model
            saver.restore(sess, lstm_classifier.SAVE_PATH)

            # Number of sentences in the paper
            num_sents = len(paper)

            # ----> Create the matrix for sentences for the LSTM <----
            sentence_list = []

            for sent, sent_vec, abs_vec, feats in paper:
                if len(sent) < MAX_SENT_LEN:
                    sentence_list.append(sent)
                else:
                    sentence_list.append(sent[0:MAX_SENT_LEN])

            # Get the matrix representation of the sentences
            sentence_matrix, sent_lens = sents2input(sentence_list, num_sents)

            # Create the feed_dict
            feed_dict = {
                sentence_input: sentence_matrix,
                seq_lens: sent_lens,
                keep_prob: 1
            }

            # Predict how good a summary each sentence is using the computation graph
            probs = sess.run(prediction_probs, feed_dict=feed_dict)

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i][1]
                sentences_and_summary_probs.append(
                    (sentence, sentence_vec, prob, i))

        tf.reset_default_graph()
        features_graph = features_mlp.graph()
        features_classifier_input = features_graph["features_input"]
        features_prediction_probs = features_graph["prediction_probs"]
        with tf.Session() as sess:

            # Initialise all variables
            sess.run(tf.global_variables_initializer())

            # Saving object
            saver = tf.train.Saver()

            # ====> Run the second graph <====
            saver.restore(sess, features_mlp.SAVE_PATH)

            # ----> Create the matrix of features for the LSTM <----
            feature_matrix = np.zeros((num_sents, NUM_FEATURES),
                                      dtype=np.float32)

            i = 0
            for _, _, _, feat in paper:
                feature_matrix[i, :] = feat
                i += 1

            # Predict how good a summary each sentence is using the computation graph
            probs = sess.run(
                features_prediction_probs,
                feed_dict={features_classifier_input: feature_matrix})

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i][1]
                sentences_feat_summary_probs.append(
                    (sentence, sentence_vec, prob, i))

        # ====> Combine the results <====

        # This list is now sorted by the probability of the sentence being a good summary sentence
        #sentences_and_summary_probs = [x for x in reversed(sorted(sentences_and_summary_probs, key=itemgetter(2)))]

        # Sort features list in probability order
        #sentences_feat_summary_probs = [x for x in reversed(sorted(sentences_feat_summary_probs, key=itemgetter(2)))]

        summary = []
        sents_already_added = set()

        # ====> Attempt Four <====
        final_sents_probs = []

        for item in zip(sentences_feat_summary_probs,
                        sentences_and_summary_probs):
            prob_summNet = item[1][2] * (1 - self.C)
            prob_Features = item[0][2] * (1 + self.C)
            avg_prob = (prob_summNet + prob_Features) / 2
            final_sents_probs.append(
                (item[0][0], item[0][1], avg_prob, item[0][3]))

        final_sents_probs = [
            x for x in reversed(sorted(final_sents_probs, key=itemgetter(2)))
        ]

        summary = final_sents_probs[0:self.summary_length]
        """
        # ====> Attempt Three <====
        # Take summary sentences from features
        summary = sentences_feat_summary_probs[0:self.summary_length]
        for item in summary:
            sents_already_added.add(item[3])

        # Add ones from summary net if it's sure of them and they aren't there already
        max_additional = 5
        count_additional = 0
        for item in sentences_and_summary_probs:
            if count_additional > max_additional:
                break
            if item[3] not in sents_already_added and item[2] > 0.95:
                summary.append(item)
                sents_already_added.add(item[3])
                count_additional += 1
        """
        """
        # ====> Attempt Two <====
        i = 0
        while len(summary) < self.summary_length:

            if i >= len(sentences_feat_summary_probs) and i >= len(sentences_and_summary_probs):
                break

            feats = sentences_feat_summary_probs[i]
            summNet = sentences_and_summary_probs[i]

            feats_prob = feats[2]
            summNet_prob = summNet[2]

            if feats_prob >= summNet_prob and feats[3] not in sents_already_added:
                summary.append(feats)
                sents_already_added.add(feats[3])
            elif summNet_prob > feats_prob and summNet[3] not in sents_already_added:
                summary.append(summNet)
                sents_already_added.add(summNet[3])

            i += 1
        """
        """
        # ====> Attempt One <====
        # True to select a summary sentence from summ_net, false to select from features
        summ_net = True
        for i in range(num_sents):

            if len(summary) >= self.summary_length \
                    or len(sentences_and_summary_probs) <= 0 \
                    or len(sentences_feat_summary_probs) <= 0:
                break

            added = False

            if summ_net:

                while not added:

                    if len(sentences_and_summary_probs) <= 0:
                        break

                    highest_prob = sentences_and_summary_probs.pop(0)
                    if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len:
                        continue
                    else:
                        summary.append(highest_prob)
                        sents_already_added.add(highest_prob[3])
                        added = True

                summ_net = False

            else:

                while not added:

                    if len(sentences_feat_summary_probs) <= 0:
                        break

                    highest_prob = sentences_feat_summary_probs.pop(0)
                    if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len:
                        continue
                    else:
                        summary.append(highest_prob)
                        sents_already_added.add(highest_prob[3])
                        added = True

                summ_net = True
        """

        # Order sumamry sentences according to the order they appear in the paper
        ordered_summary = sorted(summary, key=itemgetter(-1))

        # Print the summary
        summary = []

        for sentence, sentence_vec, prob, pos in ordered_summary:
            sentence = " ".join(sentence)
            summary.append((sentence, pos))

        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary,
                                       filename.strip(".txt"))

        for sentence in summary:
            print(sentence)
            print()