def get_highest_rated(self): artcol = self.client['mh']['articles'] highest = artcol.find_one(sort=[("star", -1)]) highest.pop('text') highest.pop('keywords') highest['_id'] = str(highest['_id']) highest['site'] = get_site(highest['url']) highest['title'] = strip_title(highest['title']) return highest
def get_random_article(self): artcol = self.client['mh']['articles'] colsize = artcol.count() randart = artcol.find()[random.randrange(colsize)] randart.pop('text') randart.pop('keywords') randart['_id'] = str(randart['_id']) randart['site'] = get_site(randart['url']) randart['title'] = strip_title(randart['title']) return randart
def get_best_match(self, w): # find synonyms # iterate synonyms and match articles. # articles that appear the most are chosen as best match w = w.lower() splitword = w.split(' ') synlist = [] for word in splitword: synlist.extend(find_syns(word)) relate_dict = {} artcol = self.client['mh']['articles'] for word in splitword: queryresult = artcol.find({'keywords': {'$in': [word]}}) for art in queryresult: if str(art['_id']) in relate_dict: relate_dict[str(art['_id'])] += 3 else: relate_dict[str(art['_id'])] = 3 for syn in synlist: synqueryresult = artcol.find({'keywords': {'$in': [syn]}}) for art in synqueryresult: if str(art['_id']) in relate_dict: relate_dict[str(art['_id'])] += 1 else: relate_dict[str(art['_id'])] = 1 keys = relate_dict.keys() values = relate_dict.values() max_index = array_max_index(values) if max_index < 0: return False best_id = keys[max_index] best_article = artcol.find({'_id': ObjectId(best_id)})[0] best_article.pop('text') best_article.pop('keywords') best_article['_id'] = str(best_article['_id']) best_article['site'] = get_site(best_article['url']) best_article['title'] = strip_title(best_article['title']) return best_article