def recent_articles(self, by_field="META.ADDED", num=None): body = dict( size=100, sort=[{ "date": { "order": "desc" } }], ) docs = es.search(index="inca", body=body)['hits']['hits'] return docs
def last_seen(): news = News.query.filter_by(user_id = current_user.id).order_by(desc(News.id)).limit(9) news_ids = [item.elasticsearch for item in news] recommended = [item.recommended for item in news] id_rec = zip(news_ids, recommended) news_last_seen = [] for item in id_rec: doc = es.search(index=indexName, body={"query":{"term":{"_id":item[0]}}}).get('hits',{}).get('hits',[""]) for text in doc: text['recommended'] = item[1] news_last_seen.append(text) return news_last_seen
def doctype_last(self, doctype, by_field="META.ADDED", num=None): if num == None: num = self.num_less user = User.query.get(current_user.id) selected_articles = self.get_selected() displayed_articles = user.displayed_news.all() displayed_ids = [a.elasticsearch for a in displayed_articles] docs = es.search(index=indexName, body={ "sort": [{ by_field: { "order": "desc" } }], "size": num, "query": { "bool": { "filter": { "term": { self.doctypefield: doctype } } } } }).get('hits', {}).get('hits', [""]) final_docs = [] a = ["podcast", "live"] for doc in docs: if self.textfield not in doc["_source"].keys( ) or self.titlefield not in doc["_source"].keys() or ( self.teaserfield not in doc["_source"].keys() and self.teaseralt not in doc["_source"].keys() ) or doc['_id'] in displayed_ids or topicfield not in doc[ '_source'].keys(): pass elif "paywall_na" in doc["_source"].keys(): if doc["_source"]["paywall_na"] == True: pass else: if any(x in doc['_source'][self.textfield] for x in a): pass else: final_docs.append(doc) elif any(x in doc["_source"][self.textfield] for x in a): pass else: final_docs.append(doc) return final_docs
def get_selected(self): user = User.query.get(current_user.id) selected_articles = user.selected_news.all() selected_ids = [a.news_id for a in selected_articles] docs = [] for item in selected_ids: doc = es.search(index=indexName, body={ "query": { "terms": { "_id": [item] } } }).get('hits', {}).get('hits', [""]) for d in doc: docs.append(d) return docs
def show_detail(id): selected = News_sel.query.filter_by(id=id).first() es_id = selected.news_id doc = es.search(index=indexName, body={ "query": { "term": { "_id": es_id } } }).get('hits', {}).get('hits', [""]) for item in doc: text = item['_source'][rec.textfield] if "||" in text: text = re.split(r'\|\|\.\|\|', text) text = ''.join(text) text = re.split(r'\|\|\|', text) text = ''.join(text) text = re.split(r'\|\|', text) else: text = [text] try: teaser = item['_source'][teaserfield] except KeyError: teaser = item['_source'][textfield][:50] teaser = re.sub(r'<.*?>', ' ', teaser) title = item['_source']['title'] url = item['_source']['url'] publication_date = item['_source']['date'] publication_date = datetime.strptime(publication_date, '%Y-%m-%dT%H:%M:%S') try: for image in item['_source']['images']: image_url = image['url'] except KeyError: image_url = [] image_caption = [] try: source = item['_source']['publisher'] except KeyError: source = "onbekende bron" form = rating() if request.method == 'POST' and form.validate(): selected.starttime = session.pop('start_time', None) selected.endtime = datetime.utcnow() try: selected.time_spent = selected.endtime - selected.starttime except: selected.time_spent = None if request.form['rating'] == '': pass else: selected.rating = request.form['rating'] if request.form['rating2'] == '': pass else: selected.rating2 = request.form['rating2'] db.session.commit() points_ratings = Points_ratings.query.filter_by( user_id=current_user.id).all() if points_ratings is None: ratings = Points_ratings(points_ratings=0.5, user_id=current_user.id) db.session.add(ratings) else: dates = [item.timestamp.date() for item in points_ratings] points = [item.points_ratings for item in points_ratings] points_dict = dict(zip(dates, points)) now = datetime.utcnow().date() points_today = 0 for key, value in points_dict.items(): if key == now: points_today += value else: pass if points_today >= 5: ratings = Points_ratings(points_ratings=0, user_id=current_user.id) db.session.add(ratings) else: ratings = Points_ratings(points_ratings=0.5, user_id=current_user.id) db.session.add(ratings) db.session.commit() return redirect(url_for('decision')) session['start_time'] = datetime.utcnow() return render_template('detail.html', text=text, teaser=teaser, title=title, url=url, image=image_url, time=publication_date, source=source, form=form, id=id)
def past_behavior(self): ''' Recommends articles based on the stories the user has selected in the past, using SoftCosineSimilarity The similarity coefficients should already be in the SQL database (by running the 'get_similarities' file on a regular basis) and only need to be retrieved (no calculation at this point) ''' #make a query generator out of the past selected articles (using tfidf model from dictionary); retrieve the articles that are part of the index (based on article_ids) if None in (dictionary, index, article_ids): final_list = self.random_selection() return (final_list) #Get all ids of read articles of the user from the database and retrieve their similarities user = User.query.get(current_user.id) selected_articles = user.selected_news.all() selected_ids = [a.id for a in selected_articles] list_tuples = [] cursor.execute( "select * from similarities where similarities.id_old in ('%s')" % "','".join(selected_ids)) for item in cursor: list_tuples.append(item) #make datatframe to get the three most similar articles to every read article, then select the ones that are most often in thet top 3 and retrieve those as selection data = pd.DataFrame(list_tuples, columns=['id', 'id2', 'url', 'similarity']) data['url'] = data['url'].str.decode('utf-8') data['similarity'] = data['similarity'].str.decode('utf-8') data = data[data['similarity'] < 0.9] diversity = User.query.get(current_user.divers) if diversity == 1: a = data.sort_values( by=['similarity'], ascending=False).groupby('id2').groupby('id2').apply( lambda x: x.head(int(len(x) * 0.2))).reset_index( drop=True).groupby('url').size().sort_values( ascending=False) elif diversity == 2: a = data.sort_values(by=['similarity'], ascending=False).groupby( 'id2').apply(lambda x: x.head(int(len(x) * 0.4))).reset_index( drop=True).groupby('id2').apply( lambda x: x.tail(int(len(x) * 0.5))).reset_index( drop=True).groupby('url').size().sort_values( ascending=False) elif diversity == 3: a = data.sort_values(by=['similarity'], ascending=False).groupby( 'id2').apply(lambda x: x.head(int(len(x) * 0.6))).reset_index( drop=True).groupby('id2').apply( lambda x: x.tail(int(len(x) * 0.33))).reset_index( drop=True).groupby('url').size().sort_values( ascending=False) elif diversity == 4: a = data.sort_values(by=['similarity'], ascending=False).groupby( 'id2').apply(lambda x: x.head(int(len(x) * 0.8))).reset_index( drop=True).groupby('id2').apply( lambda x: x.tail(int(len(x) * 0.25))).reset_index( drop=True).groupby('url').size().sort_values( ascending=False) elif diversity == 5: a = data.sort_values( by=['similarity'], ascending=False).groupby('id2').groupby('id2').apply( lambda x: x.tail(int(len(x) * 0.2))).reset_index( drop=True).groupby('url').size().sort_values( ascending=False) try: num_recommender = User.query.get(current_user.num_recommended) except: num_recommender = num_recommender recommender_ids = a.index[0, num_recommender] recommender_selection = es.search(index=indexName, body={ "query": { "terms": { "_id": recommender_ids } } }).get('hits', {}).get('hits', [""]) #Possibly: Weigh in the ratings for the past articles to determine which ones get "preference" #Mark the selected articles as recommended, select random articles from the non-recommended articles #(and get more if not enough unseen articles available), put the two lists together, randomize the ordering and return them num_random = self.num_select - len(recommender_selection) random_list = [ a for a in new_articles if a["_id"] not in recommender_ids and a["_id"] not in query_ids ] try: random_selection = random.sample(random_list, num_random) for article in random_selection: article['recommended'] = 0 except ValueError: try: newtry = self.num_more new_articles = [ self.doctype_last(s, num=newtry) for s in list_of_sources ] new_articles = [a for b in articles for a in b] random_list = [ a for a in new_articles if a["_id"] not in recommender_ids ] random_selection = random.sample(random_list, num_random) except: random_selection = "not enough stories" return (random_selection) for article in random_selection: article['recommended'] = 0 for article in recommender_selection: article['recommended'] = 1 final_list = recommender_selection + random_selection final_list = random.sample(final_list, len(final_list)) return (final_list)