Beispiel #1
0
def test_rank2(obj):
    sents = list(cut_sentence(obj.content))
    docs = [list(Tokenize(sent)) for sent in sents]
    sim_res = bm25_weights(docs)
    rank = TextRank(sim_res)
    rank.solve()
    top_n_summary = []
    for index in sorted(rank.top_index(3)):
        top_n_summary.append(sents[index])
    print 'test_rank2', u'。 '.join(top_n_summary).replace('\r','').replace('\n','')+u'。'
Beispiel #2
0
def test_rank2(obj):
    sents = list(cut_sentence(obj.content))
    docs = [list(Tokenize(sent)) for sent in sents]
    sim_res = bm25_weights(docs)
    rank = TextRank(sim_res)
    rank.solve()
    top_n_summary = []
    for index in sorted(rank.top_index(3)):
        top_n_summary.append(sents[index])
    print 'test_rank2', u'。 '.join(top_n_summary).replace('\r', '').replace(
        '\n', '') + u'。'
Beispiel #3
0
def test_rank1(obj):
    sents = list(cut_sentence(obj.content))
    docs = [dictionary.doc2bow(list(Tokenize(sent))) for sent in sents]
    num_terms = 400
    test_corpus = lsi_model[tfidf_model[docs]]
    test_dense = matutils.corpus2dense(test_corpus, num_terms).T
    test_a = [a for a in test_dense]
    sim_res = np.fromiter(itertools.imap(cos_distance, itertools.product(test_a,test_a)), dtype=np.float)
    l = len(sents)
    sim_res = np.reshape(sim_res,(l,l))
    rank = TextRank(sim_res)
    rank.solve()
    top_n_summary = []
    for index in rank.top_index(5):
        top_n_summary.append(sents[index])
    print 'test_rank1 ', u'。 '.join(top_n_summary).replace('\r','').replace('\n','')
Beispiel #4
0
def test_rank1(obj):
    sents = list(cut_sentence(obj.content))
    docs = [dictionary.doc2bow(list(Tokenize(sent))) for sent in sents]
    num_terms = 400
    test_corpus = lsi_model[tfidf_model[docs]]
    test_dense = matutils.corpus2dense(test_corpus, num_terms).T
    test_a = [a for a in test_dense]
    sim_res = np.fromiter(itertools.imap(cos_distance,
                                         itertools.product(test_a, test_a)),
                          dtype=np.float)
    l = len(sents)
    sim_res = np.reshape(sim_res, (l, l))
    rank = TextRank(sim_res)
    rank.solve()
    top_n_summary = []
    for index in rank.top_index(5):
        top_n_summary.append(sents[index])
    print 'test_rank1 ', u'。 '.join(top_n_summary).replace('\r', '').replace(
        '\n', '')