def test(self, e): k = 3 # read files and turn it into a sentences x words matrix matrix = {} for idx in range(len(e.sentences)): matrix[idx] = {} words = re.split('\W+', e.sentences[idx]) for word in words: if len(word) == 0: # skip "" continue if word not in matrix[idx]: matrix[idx][word] = 0 matrix[idx][word] += 1 # complete svd here arr = pd.DataFrame.from_dict(matrix).fillna(0).values # u, s, vh = np.linalg.svd(arr, full_matrices=True) # full matrix u, s, vh = np.linalg.svd(arr, full_matrices=False) # reduced matrix # print u.shape # print s.shape # print vh.shape # print np.allclose(arr, np.dot(u * s, vh)) # print vh[:, k] threshold = 0.5 sigma_threshold = max(s) * threshold s[s < sigma_threshold] = 0 # Set all other singular values to zero saliency_vec = np.dot( np.square(s), np.square(vh) ) # Build a "length vector" containing the length (i.e. saliency) of each sentence top_sentences = saliency_vec.argsort()[-k:][::-1] top_sentences.sort( ) # Return the sentences in the order in which they appear in the document pred = [e.sentences[i] for i in top_sentences] # print pred evaluate = Evaluator() [P_1, R_1, F1_1] = evaluate.ROUGE1(pred=(pred), test=(e.ground_truths)) [P_2, R_2, F1_2] = evaluate.ROUGE2(pred=(pred), test=(e.ground_truths)) return P_1, R_1, F1_1, P_2, R_2, F1_2