def eval_avgs_against_descriptions(embedding=None): """ Compute ROUGE-L and cosine similarity evaluation measures where ideal summaries are taken as the TED Talk descriptions. :param embedding: fastText word embedding. :return: List containing averages of precision, recall, F-score, and cosine similarity over 50 documents. :rtype: list(float) """ # Get records where one or more tag is present in transcript. df = read_data(75) df = drop_noconcept_recs(df)[:50] results = [] for j in range(len(df)): s = summarize(df.iloc[[j]], df['d_cnt'].iloc[[j]][j]) ideal = preprocess_transcripts(df.iloc[[j]], clean_transcript_data=False, df_field='description') rl = Evaluation.rouge_l(s, ideal[0][0]) cs = Evaluation.cos_similarity(s, ideal[0][0]) results.append([rl, cs]) # Average evaluation scores over number of dataframe records. results = np.asarray(results) rlresults = results[:, 0] cossim_results = results[:, 1] avg_prec = np.average([rlresults[j][0] for j in range(results.shape[0])]) avg_recall = np.average([rlresults[j][1] for j in range(results.shape[0])]) avg_fscore = np.average([rlresults[j][2] for j in range(results.shape[0])]) avg_cossim = np.average(cossim_results) return [avg_prec, avg_recall, avg_fscore, avg_cossim]
def eval_against_humangenerated(method, embedding=None): """ Compute ROUGE-L and cosine similarity evaluation measures for first five records where ideal summaries are human generated. :param method: LSA or TextRank summarization method. :param embedding: fastText word embedding. :return results: List containing evalution measure computations. :rtype: list(array_type): float """ human_summaries = [ ("It's never happened before in software! Remember, the " "hard part is not deciding what features to add, it's " "The lesson was: simplicity sells."), ("This is where I realized that there was really a need to communicate, " "because the data of what's happening in the world and the child " "health of every country is very well aware." "Now, statisticians don't like it, because they say that this will not " "show the reality; we have to have statistical, analytical methods. " "And it's a new technology coming in, but then amazingly, how well it " "fits to the economy of the countries."), ("And the interesting thing is: if you do it for love, the money comes " "anyway. 'To be successful, put your nose down in something and get " "damn good at it.' Persistence is the number one reason for our success." ), ("So honeybees are important for their role in the economy as well as " "in agriculture. We need bees for the future of our cities and urban " "living. What can you do to save the bees or to help them or to think " "of sustainable cities in the future?"), ("So now I want to introduce you to my new hero in the global climate " "change war, and that is the eastern oyster. So the oyster was the " "basis for a manifesto-like urban design project that I did about the " "New York Harbor called oyster-tecture. To conclude, this is just one " "cross-section of one piece of city, but my dream is, my hope is, that " "when you all go back to your own cities that we can start to work " "together and collaborate on remaking and reforming a new urban " "landscape towards a more sustainable, a more livable and a more " "delicious future.") ] df = read_data(5) results = [] for j in range(len(df)): s = method.summarize_text(df.iloc[[j]], 3) rl = Evaluation.rouge_l(s, human_summaries[j]) cs = Evaluation.cos_similarity(s, human_summaries[j]) results.append([rl, cs]) return results