Example #1
0
def reward2(s1, s2):
    indices = corpus[:]
    tfi = TFIDF()
    tfidf = tfi.get_tfidf(corpus)
    score = tfi.relevancy(tfidf, indices, s1, s2)
    return score + 1
Example #2
0
print(wikipedia[3][0])
#return wikipedia

import copy

#def get_corpus():
corpus = [news.text]
titles = [soup.title]
print len(corpus)

for article in wikipedia:
    # if article[0] in topics:
    corpus.append(article[1])
    titles.append(article[0])
print len(corpus)
#return corpus, titles

from tfidf import TFIDF

#def get_sim_docs():
tfi = TFIDF()
tfidf = tfi.get_tfidf(corpus)

sim_docs = []
for index, score in tfi.similar_docs(tfidf, 0, 5):
    sim_docs.append((index, score))
    print score, titles[index]

print "Most relevant document is " + titles[sim_docs[0][0]]
#return sim_docs