def test_rank2(obj): sents = list(cut_sentence(obj.content)) docs = [list(Tokenize(sent)) for sent in sents] sim_res = bm25_weights(docs) rank = TextRank(sim_res) rank.solve() top_n_summary = [] for index in sorted(rank.top_index(3)): top_n_summary.append(sents[index]) print 'test_rank2', u'。 '.join(top_n_summary).replace('\r','').replace('\n','')+u'。'
def test_rank2(obj): sents = list(cut_sentence(obj.content)) docs = [list(Tokenize(sent)) for sent in sents] sim_res = bm25_weights(docs) rank = TextRank(sim_res) rank.solve() top_n_summary = [] for index in sorted(rank.top_index(3)): top_n_summary.append(sents[index]) print 'test_rank2', u'。 '.join(top_n_summary).replace('\r', '').replace( '\n', '') + u'。'
def test_rank1(obj): sents = list(cut_sentence(obj.content)) docs = [dictionary.doc2bow(list(Tokenize(sent))) for sent in sents] num_terms = 400 test_corpus = lsi_model[tfidf_model[docs]] test_dense = matutils.corpus2dense(test_corpus, num_terms).T test_a = [a for a in test_dense] sim_res = np.fromiter(itertools.imap(cos_distance, itertools.product(test_a,test_a)), dtype=np.float) l = len(sents) sim_res = np.reshape(sim_res,(l,l)) rank = TextRank(sim_res) rank.solve() top_n_summary = [] for index in rank.top_index(5): top_n_summary.append(sents[index]) print 'test_rank1 ', u'。 '.join(top_n_summary).replace('\r','').replace('\n','')
def test_rank1(obj): sents = list(cut_sentence(obj.content)) docs = [dictionary.doc2bow(list(Tokenize(sent))) for sent in sents] num_terms = 400 test_corpus = lsi_model[tfidf_model[docs]] test_dense = matutils.corpus2dense(test_corpus, num_terms).T test_a = [a for a in test_dense] sim_res = np.fromiter(itertools.imap(cos_distance, itertools.product(test_a, test_a)), dtype=np.float) l = len(sents) sim_res = np.reshape(sim_res, (l, l)) rank = TextRank(sim_res) rank.solve() top_n_summary = [] for index in rank.top_index(5): top_n_summary.append(sents[index]) print 'test_rank1 ', u'。 '.join(top_n_summary).replace('\r', '').replace( '\n', '')