def compute(topic): raw, ref = get_article(topic) sent = tokenize(raw) df = pd.DataFrame() ratio = len(ref) / len(raw) # TextRank result = text_rank(raw, sent, ref) r = Rouge() rouge = r.get_scores(result, ref) df = df.append(gen_serie('TextRank', rouge, result), ignore_index=True) # Gensim ret = summarize(raw, ratio) r = Rouge() rouge = r.get_scores(ret, ref) df = df.append(gen_serie('Gensim', rouge, ret), ignore_index=True) # KMean df = df.append(kmean(sent, ret)) # Cosine df = df.append(cosine(sent, ref), ignore_index=True) # Rearrange columns df = df[columns] df.to_csv('out/' + topic + '.csv') return df.to_json(orient='records')
def compute(topic): raw, ref = get_article(topic) sent = tokenize(raw) df = pd.DataFrame() ratio = len(ref) / len(raw) #the proportion of the number of sentences of the original text to be chosen for the summary. # TextRank result = text_rank(raw, sent, ref) r = Rouge() rouge = r.get_scores(result, ref) df = df.append(gen_serie('TextRank', rouge, result), ignore_index=True) # Gensim --- based on ranks of text sentences using a variation of the TextRank algorithm ret = summarize(raw, ratio) r = Rouge() rouge = r.get_scores(ret, ref) df = df.append(gen_serie('Gensim', rouge, ret), ignore_index=True) # KMean df = df.append(kmean(sent, ret)) # Cosine df = df.append(cosine(sent, ref), ignore_index=True) # Rearrange columns df = df[columns] df.to_csv('out/' + topic + '.csv') return df.to_json(orient='records')
def kmean(text, ref): df = pd.DataFrame() for i in range(2, 11): res = cluster(text, ref, i) r = Rouge() rouge = r.get_scores(' '.join(res), ref) df = df.append(gen_serie('K-mean-' + str(i), rouge, res), ignore_index=True) return df
def cosine(texts, ref): vec = TfidfVectorizer(tokenizer=textblob_tokenizer, stop_words='english', use_idf=True) matrix = vec.fit_transform(texts) cosine_similarities = cosine_similarity(matrix[0:1], matrix).flatten() nb_sentences_in_base_summary = len(ref.split('.')) cosine_similarities = list(cosine_similarities) cos_results = [] for i in range(0, nb_sentences_in_base_summary): n = cosine_similarities.index(max(cosine_similarities)) cos_results.append(texts[n]) del cosine_similarities[n] res = ' '.join(cos_results) r = Rouge() rouge = r.get_scores(res, ref) return gen_serie('Cosine Similarity', rouge, res)