def summarize_clusters_lexrank(): ms = MongoStore() clusters = ms.get_pending_clusters() for cluster in clusters: retrieve_ids = [aid for aid, sim in cluster["articles"] if sim > .55] articles = ms.get_articles_from_ids(retrieve_ids) art_texts = [ a["text_content"].replace(u"\u201D", u"\"").replace(u"\u201C", u"\"") for a in articles ] summary = sumpy.lexrank(art_texts) sents = [] for x, row in summary._df.head(5).iterrows(): s = { "article_id": articles[row["doc id"]]["_id"], "sentence_id": row["sent id"], "text": row["sent text"] } sents.append(s) summary_map = { "sentences": sents, "cluster_id": cluster["_id"], "summary_type": "lexrank", "date": datetime.now() } if ms.insert_summary(summary_map): ms.set_summarized_flag(cluster)
def summarize_clusters_lexrank(): ms = MongoStore() clusters = ms.get_pending_clusters() for cluster in clusters: retrieve_ids = [aid for aid, sim in cluster["articles"] if sim > .55] articles = ms.get_articles_from_ids(retrieve_ids) art_texts = [a["text_content"].replace(u"\u201D", u"\"").replace( u"\u201C", u"\"") for a in articles] summary = sumpy.lexrank(art_texts) sents = [] for x, row in summary._df.head(5).iterrows(): s = {"article_id": articles[row["doc id"]]["_id"], "sentence_id": row["sent id"], "text": row["sent text"]} sents.append(s) summary_map = {"sentences": sents, "cluster_id": cluster["_id"], "summary_type": "lexrank", "date": datetime.now()} if ms.insert_summary(summary_map): ms.set_summarized_flag(cluster)
from mongo import MongoStore ms = MongoStore() clusters = ms.get_pending_clusters() for cluster in clusters: retrieve_ids = [aid for aid, sim in cluster["articles"] if sim > .55] articles = ms.get_articles_from_ids(retrieve_ids) art_texts = [a["text_content"].replace(u"\u201D", u"\"").replace( u"\u201C", u"\"") for a in articles] summary = sumpy.lexrank(art_texts) sents = [] for x, row in summary._df.head(5).iterrows(): s = {"article_id": articles[row["doc id"]]["_id"], "sentence_id": row["sent id"], "text": row["sent text"]} sents.append(s) summary_map = {"sentences": sents, "cluster_id": cluster["_id"], "summary_type": "lexrank", "date": datetime.now()} if ms.insert_summary(summary_map): ms.set_summarized_flag(cluster)