Example #1
0
def summarize2(txt, cuttor=None):
    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U))
    
    sentences = []
    for s in cut_sentence(txt):
        sentences.append(s)
    normalized_sentences = [s.lower() for s in sentences]

    top_n_words = extract_keywords(txt, N_2, tmp_cuttor)
    scored_sentences = __score_sentences(normalized_sentences, top_n_words, tmp_cuttor)

    top_n_scored = sorted(scored_sentences, key=lambda s: s[1])[-TOP_SENTENCES:]
    top_n_scored = sorted(top_n_scored, key=lambda s: s[0])
    top_n_summary=[sentences[idx] for (idx, score) in top_n_scored]
    #return ', '.join(top_n_summary) + '.'
    return u'。 '.join(top_n_summary) + u'。 '
Example #2
0
def summarize3(txt, cuttor=None):
    # Remove numpy and calc mean,std by own 21/08/13 13:07:22
    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U))
    
    sentences = []
    for s in cut_sentence(txt):
        sentences.append(s)
    normalized_sentences = [s.lower() for s in sentences]

    top_n_words = extract_keywords(txt, N_3, tmp_cuttor)
    scored_sentences = __score_sentences(normalized_sentences, top_n_words, tmp_cuttor)
    avg,std = _mean_std([s[1] for s in scored_sentences])
    mean_scored = [(sent_idx, score) for (sent_idx, score) in scored_sentences
                   if score > avg + 0.5 * std]
    mean_scored_summary=[sentences[idx] for (idx, score) in mean_scored]
    #return ', '.join(mean_scored_summary) + '.'
    return u'。 '.join(mean_scored_summary) + u'。 '
Example #3
0
def summarize3(txt, cuttor=None):
    # Remove numpy and calc mean,std by own 21/08/13 13:07:22
    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(
            re.compile('(\d+)|([a-zA-Z]+)', re.I | re.U))

    sentences = []
    for s in cut_sentence(txt):
        sentences.append(s)
    normalized_sentences = [s.lower() for s in sentences]

    top_n_words = extract_keywords(txt, N_3, tmp_cuttor)
    scored_sentences = __score_sentences(normalized_sentences, top_n_words,
                                         tmp_cuttor)
    avg, std = _mean_std([s[1] for s in scored_sentences])
    mean_scored = [(sent_idx, score) for (sent_idx, score) in scored_sentences
                   if score > avg + 0.5 * std]
    mean_scored_summary = [sentences[idx] for (idx, score) in mean_scored]
    #return ', '.join(mean_scored_summary) + '.'
    return u'。 '.join(mean_scored_summary) + u'。 '
Example #4
0
def summarize2(txt, cuttor=None):
    if cuttor:
        tmp_cuttor = cuttor
    else:
        tmp_cuttor = Cuttor()
        tmp_cuttor.set_stage1_regex(
            re.compile('(\d+)|([a-zA-Z]+)', re.I | re.U))

    sentences = []
    for s in cut_sentence(txt):
        sentences.append(s)
    normalized_sentences = [s.lower() for s in sentences]

    top_n_words = extract_keywords(txt, N_2, tmp_cuttor)
    scored_sentences = __score_sentences(normalized_sentences, top_n_words,
                                         tmp_cuttor)

    top_n_scored = sorted(scored_sentences,
                          key=lambda s: s[1])[-TOP_SENTENCES:]
    top_n_scored = sorted(top_n_scored, key=lambda s: s[0])
    top_n_summary = [sentences[idx] for (idx, score) in top_n_scored]
    #return ', '.join(top_n_summary) + '.'
    return u'。 '.join(top_n_summary) + u'。 '