def summarise(self, filename): """ Generates a summary of the paper. :param filename: the name of the file to summaries :return: a sumamry of the paper. """ # Each item has form (sentence, sentence_vector, abstract_vector, features) paper = self.prepare_paper(filename) # ========> Code from here on is summariser specific <======== with tf.Session() as sess: # Initialise all variables sess.run(tf.global_variables_initializer()) # Saving object saver = tf.train.Saver() # Restore the saved model saver.restore(sess, SAVE_PATH) # Stores sentences, the probability of them being good summaries and their position in the paper sentences_and_summary_probs = [] # Number of sentences in the paper num_sents = len(paper) # ----> Create the matrix for sentences for the LSTM <---- sentence_list = [] for sent, sent_vec, abs_vec, feats in paper: if len(sent) < MAX_SENT_LEN: sentence_list.append(sent) else: sentence_list.append(sent[0:MAX_SENT_LEN]) # Get the matrix representation of the sentences sentence_matrix, sent_lens = sents2input(sentence_list, num_sents) # ----> Create the matrix for abstracts for the LSTM <---- abstract_matrix = np.zeros((num_sents, ABSTRACT_DIMENSION), dtype=np.float32) i = 0 for _, _, abs_vec, _ in paper: abstract_matrix[i, :] = abs_vec i += 1 # ----> Create the matrix of features for the LSTM <---- feature_matrix = np.zeros((num_sents, NUM_FEATURES), dtype=np.float32) i = 0 for _, _, _, feat in paper: feature_matrix[i, :] = feat i += 1 # Create the feed_dict feed_dict = { self.sentence_input: sentence_matrix, self.abstract_input: abstract_matrix, self.features_input: feature_matrix, self.seq_lens: sent_lens, self.keep_prob: 1 } # Predict how good a summary each sentence is using the computation graph probs = sess.run(self.prediction_probs, feed_dict=feed_dict) # Store the sentences and probabilities in a list to be sorted for i in range(num_sents): sentence = paper[i][0] sentence_vec = paper[i][1] prob = probs[i][1] sentences_and_summary_probs.append( (sentence, sentence_vec, prob, i)) # This list is now sorted by the probability of the sentence being a good summary sentence sentences_and_summary_probs = [ x for x in reversed( sorted(sentences_and_summary_probs, key=itemgetter(2))) ] summary = [] for sent, sent_vec, prob, pos in sentences_and_summary_probs: if len(summary) > self.summary_length: break if len(sent) < 10: continue else: summary.append((sent, sent_vec, prob, pos)) #summary = sentences_and_summary_probs[0:self.summary_length] # Order sumamry sentences according to the order they appear in the paper ordered_summary = sorted(summary, key=itemgetter(-1)) # Print the summary summary = [] for sentence, sentence_vec, prob, pos in ordered_summary: sentence = " ".join(sentence) summary.append((sentence, pos)) useful_functions.write_summary(SUMMARY_WRITE_LOC, summary, filename.strip(".txt"))
def summarise(self, filename, visualise=False): """ Generates a summary of the paper. :param filename: the name of the file to summaries :param visualise: true if visualising output :return: a sumamry of the paper. """ # Each item has form (sentence, sentence_vector, abstract_vector, features) paper = self.prepare_paper(filename, visualise=visualise) # ========> Code from here on is summariser specific <======== # Stores sentences, the probability of them being good summaries and their position in the paper sentences_and_summary_probs = [] # Summary according to features sentences_feat_summary_probs = [] tf.reset_default_graph() computation_graph = summariser_net.graph() sentence_input = computation_graph["sentence_input"] abstract_input = computation_graph["abstract_input"] features_input = computation_graph["features_input"] seq_lens = computation_graph["sequence_lengths"] prediction_probs = computation_graph["raw_predictions"] keep_prob = computation_graph["keep_prob"] with tf.Session() as sess: # Initialise all variables sess.run(tf.global_variables_initializer()) # Saving object saver = tf.train.Saver() # Restore the saved model saver.restore(sess, summariser_net.SAVE_PATH) # ----> Create the matrix for sentences for the LSTM <---- sentence_list = [] for sent, sent_vec, abs_vec, feats in paper: if len(sent) < MAX_SENT_LEN: sentence_list.append(sent) else: sentence_list.append(sent[0:MAX_SENT_LEN]) # Number of sentences in the paper num_sents = len(sentence_list) # Get the matrix representation of the sentences sentence_matrix, sent_lens = sents2input(sentence_list, num_sents) # ----> Create the matrix for abstracts for the LSTM <---- abstract_matrix = np.zeros((num_sents, ABSTRACT_DIMENSION), dtype=np.float32) i = 0 for _, _, abs_vec, _ in paper: abstract_matrix[i, :] = abs_vec i += 1 # ----> Create the matrix of features for the LSTM <---- feature_matrix = np.zeros((num_sents, NUM_FEATURES), dtype=np.float32) i = 0 for _, _, _, feat in paper: feature_matrix[i, :] = feat i += 1 # Create the feed_dict feed_dict = { sentence_input: sentence_matrix, abstract_input: abstract_matrix, features_input: feature_matrix, seq_lens: sent_lens, keep_prob: 1 } # Predict how good a summary each sentence is using the computation graph probs = sess.run(prediction_probs, feed_dict=feed_dict) # Store the sentences and probabilities in a list to be sorted for i in range(num_sents): sentence = paper[i][0] sentence_vec = paper[i][1] prob = probs[i][1] sentences_and_summary_probs.append((sentence, sentence_vec, prob, i)) tf.reset_default_graph() features_graph = features_mlp.graph() features_classifier_input = features_graph["features_input"] features_prediction_probs = features_graph["prediction_probs"] with tf.Session() as sess: # Initialise all variables sess.run(tf.global_variables_initializer()) # Saving object saver = tf.train.Saver() # ====> Run the second graph <==== saver.restore(sess, features_mlp.SAVE_PATH) # Predict how good a summary each sentence is using the computation graph probs = sess.run(features_prediction_probs, feed_dict={features_classifier_input: feature_matrix}) # Store the sentences and probabilities in a list to be sorted for i in range(num_sents): sentence = paper[i][0] sentence_vec = paper[i][1] prob = probs[i][1] sentences_feat_summary_probs.append((sentence, sentence_vec, prob, i)) # ====> Combine the results <==== # This list is now sorted by the probability of the sentence being a good summary sentence #sentences_and_summary_probs = [x for x in reversed(sorted(sentences_and_summary_probs, key=itemgetter(2)))] # Sort features list in probability order #sentences_feat_summary_probs = [x for x in reversed(sorted(sentences_feat_summary_probs, key=itemgetter(2)))] summary = [] sents_already_added = set() # ====> Attempt Four <==== final_sents_probs = [] for item in zip(sentences_feat_summary_probs, sentences_and_summary_probs): prob_summNet = item[1][2] * (1 - self.C) prob_Features = item[0][2] * (1 + self.C) avg_prob = (prob_summNet + prob_Features) / 2 final_sents_probs.append((item[0][0], item[0][1], avg_prob, item[0][3])) final_sents_probs = [x for x in reversed(sorted(final_sents_probs, key=itemgetter(2)))] final_sents_probs = sorted(final_sents_probs, key=itemgetter(-1)) if visualise: return final_sents_probs #summary = final_sents_probs[0:self.summary_length] """ # ====> Attempt Three <==== # Take summary sentences from features summary = sentences_feat_summary_probs[0:self.summary_length] for item in summary: sents_already_added.add(item[3]) # Add ones from summary net if it's sure of them and they aren't there already max_additional = 5 count_additional = 0 for item in sentences_and_summary_probs: if count_additional > max_additional: break if item[3] not in sents_already_added and item[2] > 0.95: summary.append(item) sents_already_added.add(item[3]) count_additional += 1 """ """ # ====> Attempt Two <==== i = 0 while len(summary) < self.summary_length: if i >= len(sentences_feat_summary_probs) and i >= len(sentences_and_summary_probs): break feats = sentences_feat_summary_probs[i] summNet = sentences_and_summary_probs[i] feats_prob = feats[2] summNet_prob = summNet[2] if feats_prob >= summNet_prob and feats[3] not in sents_already_added: summary.append(feats) sents_already_added.add(feats[3]) elif summNet_prob > feats_prob and summNet[3] not in sents_already_added: summary.append(summNet) sents_already_added.add(summNet[3]) i += 1 """ """ # ====> Attempt One <==== # True to select a summary sentence from summ_net, false to select from features summ_net = True for i in range(num_sents): if len(summary) >= self.summary_length \ or len(sentences_and_summary_probs) <= 0 \ or len(sentences_feat_summary_probs) <= 0: break added = False if summ_net: while not added: if len(sentences_and_summary_probs) <= 0: break highest_prob = sentences_and_summary_probs.pop(0) if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len: continue else: summary.append(highest_prob) sents_already_added.add(highest_prob[3]) added = True summ_net = False else: while not added: if len(sentences_feat_summary_probs) <= 0: break highest_prob = sentences_feat_summary_probs.pop(0) if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len: continue else: summary.append(highest_prob) sents_already_added.add(highest_prob[3]) added = True summ_net = True """ # Order sumamry sentences according to the order they appear in the paper ordered_summary = sorted(summary, key=itemgetter(-1)) # Print the summary summary = [] for sentence, sentence_vec, prob, pos in ordered_summary: sentence = " ".join(sentence) summary.append((sentence, pos)) useful_functions.write_summary(SUMMARY_WRITE_LOC, summary, filename.strip(".txt")) for sentence in summary: print(sentence) print()
def summarise(self, filename): """ Generates a summary of the paper. :param filename: the name of the file to summaries :return: a sumamry of the paper. """ # Each item has form (sentence, sentence_vector, abstract_vector, features) paper = self.prepare_paper(filename) # ========> Code from here on is summariser specific <======== graph1 = tf.get_default_graph() with tf.Session() as sess: # Initialise all variables sess.run(tf.global_variables_initializer()) # Saving object #saver = tf.train.Saver() saver = tf.train.import_meta_graph(SAVE_PATH + 'model-200.meta') module_file = tf.train.latest_checkpoint(SAVE_PATH) # Restore the saved model saver.restore(sess, module_file) #,module_file)#, SAVE_PATH) # Stores sentences, the probability of them being good summaries and their position in the paper sentences_and_summary_probs = [] # Number of sentences in the paper num_sents = len(paper) # ----> Create the matrix for sentences for the LSTM <---- sentence_list = [] for sent, sent_vec, abs_vec, feats in paper: if len(sent) < MAX_SENT_LEN: sentence_list.append(sent) else: sentence_list.append(sent[0:MAX_SENT_LEN]) # Get the matrix representation of the sentences sentence_matrix, sent_lens = sents2input(sentence_list, num_sents) # ----> Create the matrix for abstracts for the LSTM <---- abstract_matrix = np.zeros((num_sents, ABSTRACT_DIMENSION), dtype=np.float32) i = 0 for _, _, abs_vec, _ in paper: abstract_matrix[i, :] = abs_vec i += 1 # ----> Create the matrix of features for the LSTM <---- feature_matrix = np.zeros((num_sents, NUM_FEATURES), dtype=np.float32) i = 0 for _, _, _, feat in paper: feature_matrix[i, :] = feat i += 1 #Write OUTFILE for summarunner with open(SUMM_SOURCE + filename, 'w') as OUTFILE: for i in range(num_sents): OUTFILE.write(" ".join(word for word in paper[i][0])) OUTFILE.write("\n") # Create the feed_dict feed_x = summarunner_datareader.get_input_tensor(SUMM_SOURCE + filename) #print(self.prediction_probs,feed_x) input_x = graph1.get_operation_by_name("inputs/x_input").outputs[0] self.prediction_probs = graph1.get_operation_by_name( "score_layer/prediction").outputs[0] # Predict how good a summary each sentence is using the computation graph probs = np.random.random(0) for x in feed_x: probs = np.append( sess.run(self.prediction_probs, feed_dict={input_x: x.reshape(40, 100)}), probs) # Store the sentences and probabilities in a list to be sorted for i in range(num_sents): sentence = paper[i][0] sentence_vec = paper[i][1] prob = probs[i] sentences_and_summary_probs.append( (sentence, sentence_vec, prob, i)) # This list is now sorted by the probability of the sentence being a good summary sentence sentences_and_summary_probs = [ x for x in reversed( sorted(sentences_and_summary_probs, key=itemgetter(2))) ] summary = [] for sent, sent_vec, prob, pos in sentences_and_summary_probs: if len(summary) > self.summary_length: break if len(sent) < 10: continue else: summary.append((sent, sent_vec, prob, pos)) #summary = sentences_and_summary_probs[0:self.summary_length] # Order sumamry sentences according to the order they appear in the paper ordered_summary = sorted(summary, key=itemgetter(-1)) # Print the summary summary = [] for sentence, sentence_vec, prob, pos in ordered_summary: sentence = " ".join(sentence) summary.append((sentence, pos)) #print("calling write_summary..") useful_functions.write_summary(SUMMARY_WRITE_LOC, summary, filename.strip(".txt"))