コード例 #1
0
    def summarise(self, filename):
        """
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :return: a sumamry of the paper.
        """

        # Each item has form (sentence, sentence_vector, abstract_vector, features)
        paper = self.prepare_paper(filename)

        # ========> Code from here on is summariser specific <========

        num_sents = len(paper)

        sentences_and_summary_probs = []

        # Store the sentences and probabilities in a list to be sorted
        for i in range(num_sents):
            sentence = paper[i][0]
            sentence_vec = paper[i][1]
            prob = paper[i][3][3]
            sentences_and_summary_probs.append(
                (sentence, sentence_vec, prob, i))

        # This list is now sorted by the probability of the sentence being a good summary sentence
        sentences_and_summary_probs = [
            x for x in reversed(
                sorted(sentences_and_summary_probs, key=itemgetter(2)))
        ]

        summary = sentences_and_summary_probs[0:self.summary_length]

        # Order summary sentences according to the order they appear in the paper
        ordered_summary = sorted(summary, key=itemgetter(-1))

        # Print the summary
        summary = []

        for sentence, sentence_vec, prob, pos in ordered_summary:
            sentence = " ".join(sentence)
            summary.append((sentence, pos))

        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary,
                                       filename.strip(".txt"))

        for sentence in summary:
            print(sentence)
            print()
コード例 #2
0
    def summarise(self, filename):
        """
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :param name: the name of the file that will be written
        :return: a sumamry of the paper.
        """

        paper = self.prepare_paper(filename)

        parser = PlaintextParser.from_string(paper, Tokenizer("english"))

        summary = self.summariser(parser.document, self.summary_length)

        # The "1" is only added her to stop the summary breaking the save function - it's a bit of an ungainly hack
        summary = [(unicode(x), 1) for x in summary]

        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary, filename.strip(".txt"))

        for sentence in summary:
            print(sentence)
            print()
コード例 #3
0
    def summarise(self, filename):
        """
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :param name: the name of the file that will be written
        :return: a sumamry of the paper.
        """

        paper = self.prepare_paper(filename)

        abstract = paper["ABSTRACT"][0]
        abstract = [" ".join(x) for x in abstract]

        # We don't want to make any predictions for the Abstract or Highlights as these are already summaries.
        sections_to_predict_for = []
        for section, text in paper.iteritems():
            if section != "ABSTRACT" and section != "HIGHLIGHTS":
                sections_to_predict_for.append(text)

        # Sorts the sections according to the order in which they appear in the paper.
        sorted_sections_to_predict_for = sorted(sections_to_predict_for,
                                                key=itemgetter(1))

        # Creates a list of the sentences in the paper in the correct order. Each item in the list is formed of
        # a list of words making up the sentence.
        sentence_list = []
        for sentence_text, section_position_in_paper in sorted_sections_to_predict_for:
            section_sentences = sentence_text
            for sentence in section_sentences:
                sentence_list.append(sentence)

        # Use the model to predict if each sentence is a summary sentence or not.
        predictions = []
        for sentence_text in sentence_list:

            abstract_rouge = useful_functions.compute_rouge_abstract_score(
                sentence_text, abstract)
            predictions.append(abstract_rouge)

        # Produces a list of the form [sentence_text, sentence_index_in_paper, sentence tf_idf score]
        sentence_list_with_predictions = zip(sentence_list,
                                             range(len(sentence_list)),
                                             predictions)

        # Sort according to likelihood of being a summary
        sorted_predictions = reversed(
            sorted(sentence_list_with_predictions, key=itemgetter(-1)))
        sorted_predictions = [x for x in sorted_predictions]

        # Slice the top few sentences to form the summary sentences
        summary_sents = sorted_predictions[0:self.summary_length]

        # Order sumamry sentences according to the order they appear in the paper
        ordered_summary = sorted(summary_sents, key=itemgetter(-2))

        # Print the summary
        summary = []

        for item in ordered_summary:
            sentence_position = item[1]
            sentence = " ".join(item[0])
            summary.append((sentence, sentence_position))

        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary,
                                       filename.strip(".txt"))

        for sentence in summary:
            print(sentence)
            print()
コード例 #4
0
    def summarise(self, filename):
        """
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :return: a sumamry of the paper.
        """

        # Each item has form (sentence, sentence_vector, abstract_vector, features)
        paper = self.prepare_paper(filename)

        # ========> Code from here on is summariser specific <========

        with tf.Session() as sess:

            # Initialise all variables
            sess.run(tf.global_variables_initializer())

            # Saving object
            saver = tf.train.Saver()

            # Restore the saved model
            saver.restore(sess, SAVE_PATH)

            # Stores sentences, the probability of them being good summaries and their position in the paper
            sentences_and_summary_probs = []

            # Number of sentences in the paper
            num_sents = len(paper)

            # ----> Create the matrix for sentences for the LSTM <----
            sentence_list = []

            for sent, sent_vec, abs_vec, feats in paper:
                if len(sent) < MAX_SENT_LEN:
                    sentence_list.append(sent)
                else:
                    sentence_list.append(sent[0:MAX_SENT_LEN])

            # Get the matrix representation of the sentences
            sentence_matrix, sent_lens = sents2input(sentence_list, num_sents)

            # ----> Create the matrix of features for the LSTM <----
            feature_matrix = np.zeros((num_sents, NUM_FEATURES), dtype=np.float32)

            i = 0
            for _, _, _, feat in paper:
                feature_matrix[i, :] = feat
                i += 1

            # Create the feed_dict
            feed_dict = {
                self.sentence_input: sentence_matrix,
                self.features_input: feature_matrix,
                self.seq_lens: sent_lens,
                self.keep_prob: 1
            }

            # Predict how good a summary each sentence is using the computation graph
            probs = sess.run(self.prediction_probs, feed_dict=feed_dict)

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i][1]
                sentences_and_summary_probs.append((sentence, sentence_vec, prob, i))

            # This list is now sorted by the probability of the sentence being a good summary sentence
            sentences_and_summary_probs = [x for x in reversed(sorted(sentences_and_summary_probs, key=itemgetter(2)))]

            summary = []
            for sent, sent_vec, prob, pos in sentences_and_summary_probs:
                if len(summary) > self.summary_length:
                    break

                if len(sent) < 10:
                    continue
                else:
                    summary.append((sent, sent_vec, prob, pos))

            #summary = sentences_and_summary_probs[0:self.summary_length]

            # Order sumamry sentences according to the order they appear in the paper
            ordered_summary = sorted(summary, key=itemgetter(-1))

            # Print the summary
            summary = []

            for sentence, sentence_vec, prob, pos in ordered_summary:
                sentence = " ".join(sentence)
                summary.append((sentence, pos))

        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary, filename.strip(".txt"))
コード例 #5
0
    def summarise(self, filename):
        """
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :return: a sumamry of the paper.
        """

        # Each item has form (sentence, sentence_vector, abstract_vector, features)
        paper = self.prepare_paper(filename)

        # ========> Code from here on is summariser specific <========

        with tf.Session() as sess:

            # Initialise all variables
            sess.run(tf.global_variables_initializer())

            # Saving object
            saver = tf.train.Saver()

            # Restore the saved model
            saver.restore(sess, SAVE_PATH)

            # Stores sentences, the probability of them being good summaries and their position in the paper
            sentences_and_summary_probs = []

            # Create a matrix of all of the features in the paper so that we can predict summary probabilities for
            # the whole paper at once
            num_sents = len(paper)
            feed_feats = np.zeros((num_sents, NUM_FEATURES), dtype=np.float32)
            for i, item in enumerate(paper):
                feed_feats[i, :] = item[3][1:]

            # Predict how good a summary each sentence is using the computation graph
            probs = sess.run(self.prediction_probs,
                             feed_dict={self.features_input: feed_feats})

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i][1]
                sentences_and_summary_probs.append(
                    (sentence, sentence_vec, prob, i))

            # This list is now sorted by the probability of the sentence being a good summary sentence
            sentences_and_summary_probs = [
                x for x in reversed(
                    sorted(sentences_and_summary_probs, key=itemgetter(2)))
            ]

            summary = sentences_and_summary_probs[0:self.summary_length]

            # Order summary sentences according to the order they appear in the paper
            ordered_summary = sorted(summary, key=itemgetter(-1))

            # Print the summary
            summary = []

            for sentence, sentence_vec, prob, pos in ordered_summary:
                sentence = " ".join(sentence)
                summary.append((sentence, pos))

        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary,
                                       filename.strip(".txt"))

        for sentence in summary:
            print(sentence)
            print()
コード例 #6
0
    def summarise(self, filename):
        """
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :return: a sumamry of the paper.
        """

        # Each item has form (sentence, sentence_vector, abstract_vector, features)
        paper = self.prepare_paper(filename)

        # ========> Code from here on is summariser specific <========

        # Stores sentences, the probability of them being good summaries and their position in the paper
        sentences_and_summary_probs = []

        # Summary according to features
        sentences_feat_summary_probs = []

        tf.reset_default_graph()
        computation_graph = lstm_classifier.graph()
        sentence_input = computation_graph["inputs"]
        seq_lens = computation_graph["sequence_lengths"]
        prediction_probs = computation_graph["prediction_probs"]
        keep_prob = computation_graph["keep_prob"]

        with tf.Session() as sess:

            # Initialise all variables
            sess.run(tf.global_variables_initializer())

            # Saving object
            saver = tf.train.Saver()

            # Restore the saved model
            saver.restore(sess, lstm_classifier.SAVE_PATH)

            # Number of sentences in the paper
            num_sents = len(paper)

            # ----> Create the matrix for sentences for the LSTM <----
            sentence_list = []

            for sent, sent_vec, abs_vec, feats in paper:
                if len(sent) < MAX_SENT_LEN:
                    sentence_list.append(sent)
                else:
                    sentence_list.append(sent[0:MAX_SENT_LEN])

            # Get the matrix representation of the sentences
            sentence_matrix, sent_lens = sents2input(sentence_list, num_sents)

            # Create the feed_dict
            feed_dict = {
                sentence_input: sentence_matrix,
                seq_lens: sent_lens,
                keep_prob: 1
            }

            # Predict how good a summary each sentence is using the computation graph
            probs = sess.run(prediction_probs, feed_dict=feed_dict)

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i][1]
                sentences_and_summary_probs.append(
                    (sentence, sentence_vec, prob, i))

        tf.reset_default_graph()
        features_graph = features_mlp.graph()
        features_classifier_input = features_graph["features_input"]
        features_prediction_probs = features_graph["prediction_probs"]
        with tf.Session() as sess:

            # Initialise all variables
            sess.run(tf.global_variables_initializer())

            # Saving object
            saver = tf.train.Saver()

            # ====> Run the second graph <====
            saver.restore(sess, features_mlp.SAVE_PATH)

            # ----> Create the matrix of features for the LSTM <----
            feature_matrix = np.zeros((num_sents, NUM_FEATURES),
                                      dtype=np.float32)

            i = 0
            for _, _, _, feat in paper:
                feature_matrix[i, :] = feat
                i += 1

            # Predict how good a summary each sentence is using the computation graph
            probs = sess.run(
                features_prediction_probs,
                feed_dict={features_classifier_input: feature_matrix})

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i][1]
                sentences_feat_summary_probs.append(
                    (sentence, sentence_vec, prob, i))

        # ====> Combine the results <====

        # This list is now sorted by the probability of the sentence being a good summary sentence
        #sentences_and_summary_probs = [x for x in reversed(sorted(sentences_and_summary_probs, key=itemgetter(2)))]

        # Sort features list in probability order
        #sentences_feat_summary_probs = [x for x in reversed(sorted(sentences_feat_summary_probs, key=itemgetter(2)))]

        summary = []
        sents_already_added = set()

        # ====> Attempt Four <====
        final_sents_probs = []

        for item in zip(sentences_feat_summary_probs,
                        sentences_and_summary_probs):
            prob_summNet = item[1][2] * (1 - self.C)
            prob_Features = item[0][2] * (1 + self.C)
            avg_prob = (prob_summNet + prob_Features) / 2
            final_sents_probs.append(
                (item[0][0], item[0][1], avg_prob, item[0][3]))

        final_sents_probs = [
            x for x in reversed(sorted(final_sents_probs, key=itemgetter(2)))
        ]

        summary = final_sents_probs[0:self.summary_length]
        """
        # ====> Attempt Three <====
        # Take summary sentences from features
        summary = sentences_feat_summary_probs[0:self.summary_length]
        for item in summary:
            sents_already_added.add(item[3])

        # Add ones from summary net if it's sure of them and they aren't there already
        max_additional = 5
        count_additional = 0
        for item in sentences_and_summary_probs:
            if count_additional > max_additional:
                break
            if item[3] not in sents_already_added and item[2] > 0.95:
                summary.append(item)
                sents_already_added.add(item[3])
                count_additional += 1
        """
        """
        # ====> Attempt Two <====
        i = 0
        while len(summary) < self.summary_length:

            if i >= len(sentences_feat_summary_probs) and i >= len(sentences_and_summary_probs):
                break

            feats = sentences_feat_summary_probs[i]
            summNet = sentences_and_summary_probs[i]

            feats_prob = feats[2]
            summNet_prob = summNet[2]

            if feats_prob >= summNet_prob and feats[3] not in sents_already_added:
                summary.append(feats)
                sents_already_added.add(feats[3])
            elif summNet_prob > feats_prob and summNet[3] not in sents_already_added:
                summary.append(summNet)
                sents_already_added.add(summNet[3])

            i += 1
        """
        """
        # ====> Attempt One <====
        # True to select a summary sentence from summ_net, false to select from features
        summ_net = True
        for i in range(num_sents):

            if len(summary) >= self.summary_length \
                    or len(sentences_and_summary_probs) <= 0 \
                    or len(sentences_feat_summary_probs) <= 0:
                break

            added = False

            if summ_net:

                while not added:

                    if len(sentences_and_summary_probs) <= 0:
                        break

                    highest_prob = sentences_and_summary_probs.pop(0)
                    if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len:
                        continue
                    else:
                        summary.append(highest_prob)
                        sents_already_added.add(highest_prob[3])
                        added = True

                summ_net = False

            else:

                while not added:

                    if len(sentences_feat_summary_probs) <= 0:
                        break

                    highest_prob = sentences_feat_summary_probs.pop(0)
                    if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len:
                        continue
                    else:
                        summary.append(highest_prob)
                        sents_already_added.add(highest_prob[3])
                        added = True

                summ_net = True
        """

        # Order sumamry sentences according to the order they appear in the paper
        ordered_summary = sorted(summary, key=itemgetter(-1))

        # Print the summary
        summary = []

        for sentence, sentence_vec, prob, pos in ordered_summary:
            sentence = " ".join(sentence)
            summary.append((sentence, pos))

        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary,
                                       filename.strip(".txt"))

        for sentence in summary:
            print(sentence)
            print()
コード例 #7
0
    def summarise(self, filename):
        """
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :return: a sumamry of the paper.
        """

        # Each item has form (sentence, sentence_vector, abstract_vector, features)
        paper = self.prepare_paper(filename)
        # ========> Code from here on is summariser specific <========
        graph1 = tf.get_default_graph()
        with tf.Session() as sess:

            # Initialise all variables
            sess.run(tf.global_variables_initializer())

            # Saving object
            #saver = tf.train.Saver()
            saver = tf.train.import_meta_graph(SAVE_PATH + 'model-200.meta')
            module_file = tf.train.latest_checkpoint(SAVE_PATH)

            # Restore the saved model
            saver.restore(sess, module_file)  #,module_file)#, SAVE_PATH)

            # Stores sentences, the probability of them being good summaries and their position in the paper
            sentences_and_summary_probs = []

            # Number of sentences in the paper
            num_sents = len(paper)

            # ----> Create the matrix for sentences for the LSTM <----
            sentence_list = []

            for sent, sent_vec, abs_vec, feats in paper:
                if len(sent) < MAX_SENT_LEN:
                    sentence_list.append(sent)
                else:
                    sentence_list.append(sent[0:MAX_SENT_LEN])

            # Get the matrix representation of the sentences
            sentence_matrix, sent_lens = sents2input(sentence_list, num_sents)

            # ----> Create the matrix for abstracts for the LSTM <----
            abstract_matrix = np.zeros((num_sents, ABSTRACT_DIMENSION),
                                       dtype=np.float32)

            i = 0
            for _, _, abs_vec, _ in paper:
                abstract_matrix[i, :] = abs_vec
                i += 1

            # ----> Create the matrix of features for the LSTM <----
            feature_matrix = np.zeros((num_sents, NUM_FEATURES),
                                      dtype=np.float32)

            i = 0
            for _, _, _, feat in paper:
                feature_matrix[i, :] = feat
                i += 1

            #Write OUTFILE for summarunner
            with open(SUMM_SOURCE + filename, 'w') as OUTFILE:
                for i in range(num_sents):
                    OUTFILE.write(" ".join(word for word in paper[i][0]))
                    OUTFILE.write("\n")

            # Create the feed_dict
            feed_x = summarunner_datareader.get_input_tensor(SUMM_SOURCE +
                                                             filename)
            #print(self.prediction_probs,feed_x)

            input_x = graph1.get_operation_by_name("inputs/x_input").outputs[0]
            self.prediction_probs = graph1.get_operation_by_name(
                "score_layer/prediction").outputs[0]

            # Predict how good a summary each sentence is using the computation graph
            probs = np.random.random(0)
            for x in feed_x:
                probs = np.append(
                    sess.run(self.prediction_probs,
                             feed_dict={input_x: x.reshape(40, 100)}), probs)

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i]
                sentences_and_summary_probs.append(
                    (sentence, sentence_vec, prob, i))

            # This list is now sorted by the probability of the sentence being a good summary sentence
            sentences_and_summary_probs = [
                x for x in reversed(
                    sorted(sentences_and_summary_probs, key=itemgetter(2)))
            ]

            summary = []
            for sent, sent_vec, prob, pos in sentences_and_summary_probs:
                if len(summary) > self.summary_length:
                    break

                if len(sent) < 10:
                    continue
                else:
                    summary.append((sent, sent_vec, prob, pos))

            #summary = sentences_and_summary_probs[0:self.summary_length]

            # Order sumamry sentences according to the order they appear in the paper
            ordered_summary = sorted(summary, key=itemgetter(-1))

            # Print the summary
            summary = []

            for sentence, sentence_vec, prob, pos in ordered_summary:
                sentence = " ".join(sentence)
                summary.append((sentence, pos))
        #print("calling write_summary..")
        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary,
                                       filename.strip(".txt"))