Ejemplo n.º 1
0
 def recent_articles(self, by_field="META.ADDED", num=None):
     body = dict(
         size=100,
         sort=[{
             "date": {
                 "order": "desc"
             }
         }],
     )
     docs = es.search(index="inca", body=body)['hits']['hits']
     return docs
Ejemplo n.º 2
0
def last_seen():
    news = News.query.filter_by(user_id = current_user.id).order_by(desc(News.id)).limit(9)
    news_ids = [item.elasticsearch for item in news]
    recommended = [item.recommended for item in news]
    id_rec = zip(news_ids, recommended)
    news_last_seen = []
    for item in id_rec:
        doc = es.search(index=indexName,
                  body={"query":{"term":{"_id":item[0]}}}).get('hits',{}).get('hits',[""])
        for text in doc:
                text['recommended'] = item[1]
                news_last_seen.append(text)
    return news_last_seen
Ejemplo n.º 3
0
 def doctype_last(self, doctype, by_field="META.ADDED", num=None):
     if num == None:
         num = self.num_less
     user = User.query.get(current_user.id)
     selected_articles = self.get_selected()
     displayed_articles = user.displayed_news.all()
     displayed_ids = [a.elasticsearch for a in displayed_articles]
     docs = es.search(index=indexName,
                      body={
                          "sort": [{
                              by_field: {
                                  "order": "desc"
                              }
                          }],
                          "size": num,
                          "query": {
                              "bool": {
                                  "filter": {
                                      "term": {
                                          self.doctypefield: doctype
                                      }
                                  }
                              }
                          }
                      }).get('hits', {}).get('hits', [""])
     final_docs = []
     a = ["podcast", "live"]
     for doc in docs:
         if self.textfield not in doc["_source"].keys(
         ) or self.titlefield not in doc["_source"].keys() or (
                 self.teaserfield not in doc["_source"].keys()
                 and self.teaseralt not in doc["_source"].keys()
         ) or doc['_id'] in displayed_ids or topicfield not in doc[
                 '_source'].keys():
             pass
         elif "paywall_na" in doc["_source"].keys():
             if doc["_source"]["paywall_na"] == True:
                 pass
             else:
                 if any(x in doc['_source'][self.textfield] for x in a):
                     pass
                 else:
                     final_docs.append(doc)
         elif any(x in doc["_source"][self.textfield] for x in a):
             pass
         else:
             final_docs.append(doc)
     return final_docs
Ejemplo n.º 4
0
 def get_selected(self):
     user = User.query.get(current_user.id)
     selected_articles = user.selected_news.all()
     selected_ids = [a.news_id for a in selected_articles]
     docs = []
     for item in selected_ids:
         doc = es.search(index=indexName,
                         body={
                             "query": {
                                 "terms": {
                                     "_id": [item]
                                 }
                             }
                         }).get('hits', {}).get('hits', [""])
         for d in doc:
             docs.append(d)
     return docs
Ejemplo n.º 5
0
def show_detail(id):
    selected = News_sel.query.filter_by(id=id).first()
    es_id = selected.news_id
    doc = es.search(index=indexName, body={
        "query": {
            "term": {
                "_id": es_id
            }
        }
    }).get('hits', {}).get('hits', [""])
    for item in doc:
        text = item['_source'][rec.textfield]
        if "||" in text:
            text = re.split(r'\|\|\.\|\|', text)
            text = ''.join(text)
            text = re.split(r'\|\|\|', text)
            text = ''.join(text)
            text = re.split(r'\|\|', text)
        else:
            text = [text]
        try:
            teaser = item['_source'][teaserfield]
        except KeyError:
            teaser = item['_source'][textfield][:50]
            teaser = re.sub(r'<.*?>', ' ', teaser)
        title = item['_source']['title']
        url = item['_source']['url']
        publication_date = item['_source']['date']
        publication_date = datetime.strptime(publication_date,
                                             '%Y-%m-%dT%H:%M:%S')
        try:
            for image in item['_source']['images']:
                image_url = image['url']
        except KeyError:
            image_url = []
            image_caption = []
        try:
            source = item['_source']['publisher']
        except KeyError:
            source = "onbekende bron"
    form = rating()
    if request.method == 'POST' and form.validate():
        selected.starttime = session.pop('start_time', None)
        selected.endtime = datetime.utcnow()
        try:
            selected.time_spent = selected.endtime - selected.starttime
        except:
            selected.time_spent = None
        if request.form['rating'] == '':
            pass
        else:
            selected.rating = request.form['rating']
        if request.form['rating2'] == '':
            pass
        else:
            selected.rating2 = request.form['rating2']
        db.session.commit()
        points_ratings = Points_ratings.query.filter_by(
            user_id=current_user.id).all()
        if points_ratings is None:
            ratings = Points_ratings(points_ratings=0.5,
                                     user_id=current_user.id)
            db.session.add(ratings)
        else:
            dates = [item.timestamp.date() for item in points_ratings]
            points = [item.points_ratings for item in points_ratings]
            points_dict = dict(zip(dates, points))
            now = datetime.utcnow().date()
            points_today = 0
            for key, value in points_dict.items():
                if key == now:
                    points_today += value
                else:
                    pass
            if points_today >= 5:
                ratings = Points_ratings(points_ratings=0,
                                         user_id=current_user.id)
                db.session.add(ratings)
            else:
                ratings = Points_ratings(points_ratings=0.5,
                                         user_id=current_user.id)
                db.session.add(ratings)
        db.session.commit()
        return redirect(url_for('decision'))

    session['start_time'] = datetime.utcnow()

    return render_template('detail.html',
                           text=text,
                           teaser=teaser,
                           title=title,
                           url=url,
                           image=image_url,
                           time=publication_date,
                           source=source,
                           form=form,
                           id=id)
Ejemplo n.º 6
0
    def past_behavior(self):
        '''
        Recommends articles based on the stories the user has selected in the past, using SoftCosineSimilarity
        The similarity coefficients should already be in the SQL database (by running the 'get_similarities' file on a regular basis) and only need to be retrieved (no calculation at this point)
        '''
        #make a query generator out of the past selected articles (using tfidf model from dictionary); retrieve the articles that are part of the index (based on article_ids)
        if None in (dictionary, index, article_ids):
            final_list = self.random_selection()
            return (final_list)

        #Get all ids of read articles of the user from the database and retrieve their similarities
        user = User.query.get(current_user.id)
        selected_articles = user.selected_news.all()
        selected_ids = [a.id for a in selected_articles]
        list_tuples = []
        cursor.execute(
            "select * from similarities where similarities.id_old in ('%s')" %
            "','".join(selected_ids))
        for item in cursor:
            list_tuples.append(item)

        #make datatframe to get the three most similar articles to every read article, then select the ones that are most often in thet top 3 and retrieve those as selection
        data = pd.DataFrame(list_tuples,
                            columns=['id', 'id2', 'url', 'similarity'])
        data['url'] = data['url'].str.decode('utf-8')
        data['similarity'] = data['similarity'].str.decode('utf-8')
        data = data[data['similarity'] < 0.9]
        diversity = User.query.get(current_user.divers)
        if diversity == 1:
            a = data.sort_values(
                by=['similarity'],
                ascending=False).groupby('id2').groupby('id2').apply(
                    lambda x: x.head(int(len(x) * 0.2))).reset_index(
                        drop=True).groupby('url').size().sort_values(
                            ascending=False)
        elif diversity == 2:
            a = data.sort_values(by=['similarity'], ascending=False).groupby(
                'id2').apply(lambda x: x.head(int(len(x) * 0.4))).reset_index(
                    drop=True).groupby('id2').apply(
                        lambda x: x.tail(int(len(x) * 0.5))).reset_index(
                            drop=True).groupby('url').size().sort_values(
                                ascending=False)
        elif diversity == 3:
            a = data.sort_values(by=['similarity'], ascending=False).groupby(
                'id2').apply(lambda x: x.head(int(len(x) * 0.6))).reset_index(
                    drop=True).groupby('id2').apply(
                        lambda x: x.tail(int(len(x) * 0.33))).reset_index(
                            drop=True).groupby('url').size().sort_values(
                                ascending=False)
        elif diversity == 4:
            a = data.sort_values(by=['similarity'], ascending=False).groupby(
                'id2').apply(lambda x: x.head(int(len(x) * 0.8))).reset_index(
                    drop=True).groupby('id2').apply(
                        lambda x: x.tail(int(len(x) * 0.25))).reset_index(
                            drop=True).groupby('url').size().sort_values(
                                ascending=False)
        elif diversity == 5:
            a = data.sort_values(
                by=['similarity'],
                ascending=False).groupby('id2').groupby('id2').apply(
                    lambda x: x.tail(int(len(x) * 0.2))).reset_index(
                        drop=True).groupby('url').size().sort_values(
                            ascending=False)
        try:
            num_recommender = User.query.get(current_user.num_recommended)
        except:
            num_recommender = num_recommender
        recommender_ids = a.index[0, num_recommender]
        recommender_selection = es.search(index=indexName,
                                          body={
                                              "query": {
                                                  "terms": {
                                                      "_id": recommender_ids
                                                  }
                                              }
                                          }).get('hits', {}).get('hits', [""])
        #Possibly: Weigh in the ratings for the past articles to determine which ones get "preference"

        #Mark the selected articles as recommended, select random articles from the non-recommended articles
        #(and get more if not enough unseen articles available), put the two lists together, randomize the ordering and return them
        num_random = self.num_select - len(recommender_selection)
        random_list = [
            a for a in new_articles
            if a["_id"] not in recommender_ids and a["_id"] not in query_ids
        ]
        try:
            random_selection = random.sample(random_list, num_random)
            for article in random_selection:
                article['recommended'] = 0
        except ValueError:
            try:
                newtry = self.num_more
                new_articles = [
                    self.doctype_last(s, num=newtry) for s in list_of_sources
                ]
                new_articles = [a for b in articles for a in b]
                random_list = [
                    a for a in new_articles if a["_id"] not in recommender_ids
                ]
                random_selection = random.sample(random_list, num_random)
            except:
                random_selection = "not enough stories"
                return (random_selection)
        for article in random_selection:
            article['recommended'] = 0
        for article in recommender_selection:
            article['recommended'] = 1
        final_list = recommender_selection + random_selection
        final_list = random.sample(final_list, len(final_list))
        return (final_list)