コード例 #1
    def summarise(self, filename):
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :return: a sumamry of the paper.

        # Each item has form (sentence, sentence_vector, abstract_vector, features)
        paper = self.prepare_paper(filename)

        # ========> Code from here on is summariser specific <========

        with tf.Session() as sess:

            # Initialise all variables

            # Saving object
            saver = tf.train.Saver()

            # Restore the saved model
            saver.restore(sess, SAVE_PATH)

            # Stores sentences, the probability of them being good summaries and their position in the paper
            sentences_and_summary_probs = []

            # Number of sentences in the paper
            num_sents = len(paper)

            # ----> Create the matrix for sentences for the LSTM <----
            sentence_list = []

            for sent, sent_vec, abs_vec, feats in paper:
                if len(sent) < MAX_SENT_LEN:

            # Get the matrix representation of the sentences
            sentence_matrix, sent_lens = sents2input(sentence_list, num_sents)

            # ----> Create the matrix for abstracts for the LSTM <----
            abstract_matrix = np.zeros((num_sents, ABSTRACT_DIMENSION),

            i = 0
            for _, _, abs_vec, _ in paper:
                abstract_matrix[i, :] = abs_vec
                i += 1

            # ----> Create the matrix of features for the LSTM <----
            feature_matrix = np.zeros((num_sents, NUM_FEATURES),

            i = 0
            for _, _, _, feat in paper:
                feature_matrix[i, :] = feat
                i += 1

            # Create the feed_dict
            feed_dict = {
                self.sentence_input: sentence_matrix,
                self.abstract_input: abstract_matrix,
                self.features_input: feature_matrix,
                self.seq_lens: sent_lens,
                self.keep_prob: 1

            # Predict how good a summary each sentence is using the computation graph
            probs = sess.run(self.prediction_probs, feed_dict=feed_dict)

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i][1]
                    (sentence, sentence_vec, prob, i))

            # This list is now sorted by the probability of the sentence being a good summary sentence
            sentences_and_summary_probs = [
                x for x in reversed(
                    sorted(sentences_and_summary_probs, key=itemgetter(2)))

            summary = []
            for sent, sent_vec, prob, pos in sentences_and_summary_probs:
                if len(summary) > self.summary_length:

                if len(sent) < 10:
                    summary.append((sent, sent_vec, prob, pos))

            #summary = sentences_and_summary_probs[0:self.summary_length]

            # Order sumamry sentences according to the order they appear in the paper
            ordered_summary = sorted(summary, key=itemgetter(-1))

            # Print the summary
            summary = []

            for sentence, sentence_vec, prob, pos in ordered_summary:
                sentence = " ".join(sentence)
                summary.append((sentence, pos))

        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary,
コード例 #2
    def summarise(self, filename, visualise=False):
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :param visualise: true if visualising output
        :return: a sumamry of the paper.

        # Each item has form (sentence, sentence_vector, abstract_vector, features)
        paper = self.prepare_paper(filename, visualise=visualise)

        # ========> Code from here on is summariser specific <========

        # Stores sentences, the probability of them being good summaries and their position in the paper
        sentences_and_summary_probs = []

        # Summary according to features
        sentences_feat_summary_probs = []

        computation_graph = summariser_net.graph()
        sentence_input = computation_graph["sentence_input"]
        abstract_input = computation_graph["abstract_input"]
        features_input = computation_graph["features_input"]
        seq_lens = computation_graph["sequence_lengths"]
        prediction_probs = computation_graph["raw_predictions"]
        keep_prob = computation_graph["keep_prob"]

        with tf.Session() as sess:

            # Initialise all variables

            # Saving object
            saver = tf.train.Saver()

            # Restore the saved model
            saver.restore(sess, summariser_net.SAVE_PATH)

            # ----> Create the matrix for sentences for the LSTM <----
            sentence_list = []

            for sent, sent_vec, abs_vec, feats in paper:
                if len(sent) < MAX_SENT_LEN:

            # Number of sentences in the paper
            num_sents = len(sentence_list)

            # Get the matrix representation of the sentences
            sentence_matrix, sent_lens = sents2input(sentence_list, num_sents)

            # ----> Create the matrix for abstracts for the LSTM <----
            abstract_matrix = np.zeros((num_sents, ABSTRACT_DIMENSION), dtype=np.float32)

            i = 0
            for _, _, abs_vec, _ in paper:
                abstract_matrix[i, :] = abs_vec
                i += 1

            # ----> Create the matrix of features for the LSTM <----
            feature_matrix = np.zeros((num_sents, NUM_FEATURES), dtype=np.float32)

            i = 0
            for _, _, _, feat in paper:
                feature_matrix[i, :] = feat
                i += 1

            # Create the feed_dict
            feed_dict = {
                sentence_input: sentence_matrix,
                abstract_input: abstract_matrix,
                features_input: feature_matrix,
                seq_lens: sent_lens,
                keep_prob: 1

            # Predict how good a summary each sentence is using the computation graph
            probs = sess.run(prediction_probs, feed_dict=feed_dict)

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i][1]
                sentences_and_summary_probs.append((sentence, sentence_vec, prob, i))

        features_graph = features_mlp.graph()
        features_classifier_input = features_graph["features_input"]
        features_prediction_probs = features_graph["prediction_probs"]
        with tf.Session() as sess:

            # Initialise all variables

            # Saving object
            saver = tf.train.Saver()

            # ====> Run the second graph <====
            saver.restore(sess, features_mlp.SAVE_PATH)

            # Predict how good a summary each sentence is using the computation graph
            probs = sess.run(features_prediction_probs, feed_dict={features_classifier_input: feature_matrix})

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i][1]
                sentences_feat_summary_probs.append((sentence, sentence_vec, prob, i))

        # ====> Combine the results <====

        # This list is now sorted by the probability of the sentence being a good summary sentence
        #sentences_and_summary_probs = [x for x in reversed(sorted(sentences_and_summary_probs, key=itemgetter(2)))]

        # Sort features list in probability order
        #sentences_feat_summary_probs = [x for x in reversed(sorted(sentences_feat_summary_probs, key=itemgetter(2)))]

        summary = []
        sents_already_added = set()

        # ====> Attempt Four <====
        final_sents_probs = []

        for item in zip(sentences_feat_summary_probs, sentences_and_summary_probs):
            prob_summNet = item[1][2] * (1 - self.C)
            prob_Features = item[0][2] * (1 + self.C)
            avg_prob = (prob_summNet + prob_Features) / 2
            final_sents_probs.append((item[0][0], item[0][1], avg_prob, item[0][3]))

        final_sents_probs = [x for x in reversed(sorted(final_sents_probs, key=itemgetter(2)))]
        final_sents_probs = sorted(final_sents_probs, key=itemgetter(-1))

        if visualise:
            return final_sents_probs

        #summary = final_sents_probs[0:self.summary_length]

        # ====> Attempt Three <====
        # Take summary sentences from features
        summary = sentences_feat_summary_probs[0:self.summary_length]
        for item in summary:

        # Add ones from summary net if it's sure of them and they aren't there already
        max_additional = 5
        count_additional = 0
        for item in sentences_and_summary_probs:
            if count_additional > max_additional:
            if item[3] not in sents_already_added and item[2] > 0.95:
                count_additional += 1
        # ====> Attempt Two <====
        i = 0
        while len(summary) < self.summary_length:

            if i >= len(sentences_feat_summary_probs) and i >= len(sentences_and_summary_probs):

            feats = sentences_feat_summary_probs[i]
            summNet = sentences_and_summary_probs[i]

            feats_prob = feats[2]
            summNet_prob = summNet[2]

            if feats_prob >= summNet_prob and feats[3] not in sents_already_added:
            elif summNet_prob > feats_prob and summNet[3] not in sents_already_added:

            i += 1
        # ====> Attempt One <====
        # True to select a summary sentence from summ_net, false to select from features
        summ_net = True
        for i in range(num_sents):

            if len(summary) >= self.summary_length \
                    or len(sentences_and_summary_probs) <= 0 \
                    or len(sentences_feat_summary_probs) <= 0:

            added = False

            if summ_net:

                while not added:

                    if len(sentences_and_summary_probs) <= 0:

                    highest_prob = sentences_and_summary_probs.pop(0)
                    if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len:
                        added = True

                summ_net = False


                while not added:

                    if len(sentences_feat_summary_probs) <= 0:

                    highest_prob = sentences_feat_summary_probs.pop(0)
                    if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len:
                        added = True

                summ_net = True

        # Order sumamry sentences according to the order they appear in the paper
        ordered_summary = sorted(summary, key=itemgetter(-1))

        # Print the summary
        summary = []

        for sentence, sentence_vec, prob, pos in ordered_summary:
            sentence = " ".join(sentence)
            summary.append((sentence, pos))

        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary, filename.strip(".txt"))

        for sentence in summary:
コード例 #3
    def summarise(self, filename):
        Generates a summary of the paper.
        :param filename: the name of the file to summaries
        :return: a sumamry of the paper.

        # Each item has form (sentence, sentence_vector, abstract_vector, features)
        paper = self.prepare_paper(filename)
        # ========> Code from here on is summariser specific <========
        graph1 = tf.get_default_graph()
        with tf.Session() as sess:

            # Initialise all variables

            # Saving object
            #saver = tf.train.Saver()
            saver = tf.train.import_meta_graph(SAVE_PATH + 'model-200.meta')
            module_file = tf.train.latest_checkpoint(SAVE_PATH)

            # Restore the saved model
            saver.restore(sess, module_file)  #,module_file)#, SAVE_PATH)

            # Stores sentences, the probability of them being good summaries and their position in the paper
            sentences_and_summary_probs = []

            # Number of sentences in the paper
            num_sents = len(paper)

            # ----> Create the matrix for sentences for the LSTM <----
            sentence_list = []

            for sent, sent_vec, abs_vec, feats in paper:
                if len(sent) < MAX_SENT_LEN:

            # Get the matrix representation of the sentences
            sentence_matrix, sent_lens = sents2input(sentence_list, num_sents)

            # ----> Create the matrix for abstracts for the LSTM <----
            abstract_matrix = np.zeros((num_sents, ABSTRACT_DIMENSION),

            i = 0
            for _, _, abs_vec, _ in paper:
                abstract_matrix[i, :] = abs_vec
                i += 1

            # ----> Create the matrix of features for the LSTM <----
            feature_matrix = np.zeros((num_sents, NUM_FEATURES),

            i = 0
            for _, _, _, feat in paper:
                feature_matrix[i, :] = feat
                i += 1

            #Write OUTFILE for summarunner
            with open(SUMM_SOURCE + filename, 'w') as OUTFILE:
                for i in range(num_sents):
                    OUTFILE.write(" ".join(word for word in paper[i][0]))

            # Create the feed_dict
            feed_x = summarunner_datareader.get_input_tensor(SUMM_SOURCE +

            input_x = graph1.get_operation_by_name("inputs/x_input").outputs[0]
            self.prediction_probs = graph1.get_operation_by_name(

            # Predict how good a summary each sentence is using the computation graph
            probs = np.random.random(0)
            for x in feed_x:
                probs = np.append(
                             feed_dict={input_x: x.reshape(40, 100)}), probs)

            # Store the sentences and probabilities in a list to be sorted
            for i in range(num_sents):
                sentence = paper[i][0]
                sentence_vec = paper[i][1]
                prob = probs[i]
                    (sentence, sentence_vec, prob, i))

            # This list is now sorted by the probability of the sentence being a good summary sentence
            sentences_and_summary_probs = [
                x for x in reversed(
                    sorted(sentences_and_summary_probs, key=itemgetter(2)))

            summary = []
            for sent, sent_vec, prob, pos in sentences_and_summary_probs:
                if len(summary) > self.summary_length:

                if len(sent) < 10:
                    summary.append((sent, sent_vec, prob, pos))

            #summary = sentences_and_summary_probs[0:self.summary_length]

            # Order sumamry sentences according to the order they appear in the paper
            ordered_summary = sorted(summary, key=itemgetter(-1))

            # Print the summary
            summary = []

            for sentence, sentence_vec, prob, pos in ordered_summary:
                sentence = " ".join(sentence)
                summary.append((sentence, pos))
        #print("calling write_summary..")
        useful_functions.write_summary(SUMMARY_WRITE_LOC, summary,