def summarize2(txt, cuttor=None): if cuttor: tmp_cuttor = cuttor else: tmp_cuttor = Cuttor() tmp_cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U)) sentences = [] for s in cut_sentence(txt): sentences.append(s) normalized_sentences = [s.lower() for s in sentences] top_n_words = extract_keywords(txt, N_2, tmp_cuttor) scored_sentences = __score_sentences(normalized_sentences, top_n_words, tmp_cuttor) top_n_scored = sorted(scored_sentences, key=lambda s: s[1])[-TOP_SENTENCES:] top_n_scored = sorted(top_n_scored, key=lambda s: s[0]) top_n_summary=[sentences[idx] for (idx, score) in top_n_scored] #return ', '.join(top_n_summary) + '.' return u'。 '.join(top_n_summary) + u'。 '
def summarize3(txt, cuttor=None): # Remove numpy and calc mean,std by own 21/08/13 13:07:22 if cuttor: tmp_cuttor = cuttor else: tmp_cuttor = Cuttor() tmp_cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U)) sentences = [] for s in cut_sentence(txt): sentences.append(s) normalized_sentences = [s.lower() for s in sentences] top_n_words = extract_keywords(txt, N_3, tmp_cuttor) scored_sentences = __score_sentences(normalized_sentences, top_n_words, tmp_cuttor) avg,std = _mean_std([s[1] for s in scored_sentences]) mean_scored = [(sent_idx, score) for (sent_idx, score) in scored_sentences if score > avg + 0.5 * std] mean_scored_summary=[sentences[idx] for (idx, score) in mean_scored] #return ', '.join(mean_scored_summary) + '.' return u'。 '.join(mean_scored_summary) + u'。 '
def summarize3(txt, cuttor=None): # Remove numpy and calc mean,std by own 21/08/13 13:07:22 if cuttor: tmp_cuttor = cuttor else: tmp_cuttor = Cuttor() tmp_cuttor.set_stage1_regex( re.compile('(\d+)|([a-zA-Z]+)', re.I | re.U)) sentences = [] for s in cut_sentence(txt): sentences.append(s) normalized_sentences = [s.lower() for s in sentences] top_n_words = extract_keywords(txt, N_3, tmp_cuttor) scored_sentences = __score_sentences(normalized_sentences, top_n_words, tmp_cuttor) avg, std = _mean_std([s[1] for s in scored_sentences]) mean_scored = [(sent_idx, score) for (sent_idx, score) in scored_sentences if score > avg + 0.5 * std] mean_scored_summary = [sentences[idx] for (idx, score) in mean_scored] #return ', '.join(mean_scored_summary) + '.' return u'。 '.join(mean_scored_summary) + u'。 '
def summarize2(txt, cuttor=None): if cuttor: tmp_cuttor = cuttor else: tmp_cuttor = Cuttor() tmp_cuttor.set_stage1_regex( re.compile('(\d+)|([a-zA-Z]+)', re.I | re.U)) sentences = [] for s in cut_sentence(txt): sentences.append(s) normalized_sentences = [s.lower() for s in sentences] top_n_words = extract_keywords(txt, N_2, tmp_cuttor) scored_sentences = __score_sentences(normalized_sentences, top_n_words, tmp_cuttor) top_n_scored = sorted(scored_sentences, key=lambda s: s[1])[-TOP_SENTENCES:] top_n_scored = sorted(top_n_scored, key=lambda s: s[0]) top_n_summary = [sentences[idx] for (idx, score) in top_n_scored] #return ', '.join(top_n_summary) + '.' return u'。 '.join(top_n_summary) + u'。 '