def reward2(s1, s2): indices = corpus[:] tfi = TFIDF() tfidf = tfi.get_tfidf(corpus) score = tfi.relevancy(tfidf, indices, s1, s2) return score + 1
print(wikipedia[3][0]) #return wikipedia import copy #def get_corpus(): corpus = [news.text] titles = [soup.title] print len(corpus) for article in wikipedia: # if article[0] in topics: corpus.append(article[1]) titles.append(article[0]) print len(corpus) #return corpus, titles from tfidf import TFIDF #def get_sim_docs(): tfi = TFIDF() tfidf = tfi.get_tfidf(corpus) sim_docs = [] for index, score in tfi.similar_docs(tfidf, 0, 5): sim_docs.append((index, score)) print score, titles[index] print "Most relevant document is " + titles[sim_docs[0][0]] #return sim_docs