def __denorm_title_on_clusters(self, feed, attrs): if 'title' in attrs: where_clause = [ Article.feed_id == feed.id, Article.id == Cluster.main_article_id ] if self.user_id: where_clause.extend([Cluster.user_id == self.user_id]) session.query(Cluster).filter(*where_clause).update( {Cluster.main_feed_title: attrs['title']}, synchronize_session=False)
def _count_by(self, group_on, **filters): if self.user_id: filters['user_id'] = self.user_id return dict( session.query(group_on, func.count(Article.cluster_id)).outerjoin( Cluster, Article.cluster_id == Cluster.id).filter( *self._to_filters(**filters)).group_by(group_on).all())
def count_by_user_id(self, **filters): conn_max = utc_now() - timedelta(days=30) return dict(session.query(Article.user_id, func.count(Article.id)) .filter(*self._to_filters(**filters)) .join(User).filter(User.is_active.__eq__(True), User.last_connection >= conn_max) .group_by(Article.user_id).all())
def list_w_categ(self): feeds = defaultdict(list) for row in session.query(*LIST_W_CATEG_MAPPING.values())\ .filter(Feed.user_id == self.user_id, Feed.status != FeedStatus.to_delete, Feed.status != FeedStatus.deleting)\ .order_by(Feed.title): row = dict(zip(LIST_W_CATEG_MAPPING, row)) row['type'] = 'feed' feeds[row['category_id']].append(row) yield {'id': None, 'str': None, 'type': 'all-categ'} yield from feeds.get(None, []) for cid, cname in session.query(Category.id, Category.name)\ .filter(Category.user_id == self.user_id)\ .order_by(Category.name.nullsfirst()): yield {'id': cid, 'str': cname, 'type': 'categ'} yield from feeds.get(cid, [])
def _light_no_filter_query(self, processed_filters, limit=JR_PAGE_LENGTH): """If there's no filter to shorten the query (eg we're just selecting all feed with no category) we make a request more adapted to the task. """ sub_query = session.query(*JR_SQLA_FIELDS)\ .filter(*processed_filters)\ .order_by(Cluster.main_date.desc())\ .cte('clu') aggreg_feed = func.array_agg(Article.feed_id).label('feeds_id') aggreg_cat = func.array_agg(Article.category_id).label('categories_id') query = (session.query(sub_query, aggreg_feed, aggreg_cat).join( Article, Article.cluster_id == sub_query.c.id).filter( Article.user_id == self.user_id)) yield from self._iter_on_query( query.group_by(*sub_query.c).order_by( sub_query.c.main_date.desc()).limit(limit))
def _get(self, **filters): """ Abstract get. Will add the current user id if that one is not none (in which case the decision has been made in the code that the query shouldn't be user dependant) and the user is not an admin and the filters doesn't already contains a filter for that user. """ if self._user_id_key is not None and self.user_id \ and filters.get(self._user_id_key) != self.user_id: filters[self._user_id_key] = self.user_id return session.query(self._db_cls).filter(*self._to_filters(**filters))
def get_unreads(self): counters = defaultdict(int) for cid, fid, unread in session.query(Article.category_id, Article.feed_id, func.count(Cluster.id))\ .join(Article, and_(Article.cluster_id == Cluster.id, Article.user_id == self.user_id))\ .filter(and_(Cluster.user_id == self.user_id, Cluster.read.__eq__(False)))\ .group_by(Article.category_id, Article.feed_id): if cid: counters["categ-%d" % cid] += unread counters["feed-%d" % fid] = unread return counters
def join_read(self, feed_id=None, limit=JR_PAGE_LENGTH, **filters): filter_on_cat = 'category_id' in filters cat_id = filters.pop('category_id', None) if self.user_id: filters['user_id'] = self.user_id self._preprocess_per_article_filters(filters) if 'id__in' in filters and not filters['id__in']: # filtering by article but did not found anything return processed_filters = self._to_filters(**filters) if feed_id is None and not filter_on_cat: # no filter with an interesting index to use, using another query yield from self._light_no_filter_query(processed_filters, JR_PAGE_LENGTH) return art_feed_alias, art_cat_alias = aliased(Article), aliased(Article) # DESC of what's going on below : # base query with the above fields and the aggregations query = session.query(*self._get_selected( JR_FIELDS, art_feed_alias, art_cat_alias, filter_on_cat)) # adding parent filter, but we can't just filter on one id, because # we'll miss all the other parent of the cluster if feed_id: query = self._join_on_exist(query, art_feed_alias, 'feed_id', feed_id, processed_filters) else: query = query.join( art_feed_alias, and_(art_feed_alias.user_id == self.user_id, art_feed_alias.cluster_id == Cluster.id, *processed_filters)) if filter_on_cat: # joining only if filtering on categories to lighten the query # as every article doesn't obligatorily have a category > outerjoin query = self._join_on_exist(query, art_cat_alias, 'category_id', cat_id, processed_filters) # applying common filter (read / liked) # grouping all the fields so that agreg works on distant ids yield from self._iter_on_query( query.group_by(*JR_SQLA_FIELDS).filter( *processed_filters).order_by( Cluster.main_date.desc()).limit(limit))
def get_user_id_with_pending_articles(): for row in (session.query(Article.user_id).filter( Article.cluster_id.__eq__(None)).group_by(Article.user_id)): yield row[0]
def count_by_feed(self, **filters): if self.user_id: filters['user_id'] = self.user_id return dict( session.query(Article.feed_id, func.count('id')).filter( *self._to_filters(**filters)).group_by(Article.feed_id).all())
def _filter_unclustered(*fields): conn_max = utc_now() - timedelta(days=conf.feed.stop_fetch) return (session.query(*fields).filter( Article.cluster_id.__eq__(None)).join(User).filter( User.id == Article.user_id, User.is_active.__eq__(True), User.last_connection >= conn_max))