def remove_from_cluster(self, article): """Removes article with id == article_id from the cluster it belongs to If it's the only article of the cluster will delete the cluster Return True if the article is deleted at the end or not """ from jarr.controllers.cluster import ClusterController from jarr.controllers.article_clusterizer import Clusterizer if not article.cluster_id: return clu_ctrl = ClusterController(self.user_id) cluster = clu_ctrl.read(id=article.cluster_id).first() if not cluster: return try: new_art = next(new_art for new_art in cluster.articles if new_art.id != article.id) except StopIteration: # only on article in cluster, deleting cluster clu_ctrl.delete(cluster.id, delete_articles=False) else: if cluster.main_article_id == article.id: cluster.main_article_id = None Clusterizer(article.user_id).enrich_cluster( cluster, new_art, cluster.read, cluster.liked, force_article_as_main=True) self.update({'id': article.id}, {'cluster_id': None, 'cluster_reason': None, 'cluster_score': None, 'cluster_tfidf_with': None, 'cluster_tfidf_neighbor_size': None})
def test_adding_to_cluster_by_link(self): ccontr = ClusterController() cluster = ccontr.read().first() ccontr.update({'id': cluster.id}, { 'read': True, 'read_reason': 'marked' }) cluster = ccontr.get(id=cluster.id) self.assertTrue(cluster.read) article = cluster.articles[0] articles_count = len(cluster.articles) fcontr = FeedController(cluster.user_id) acontr = ArticleController(cluster.user_id) fcontr.update({'id': article.feed_id}, {'cluster_wake_up': True}) feed = fcontr.read(id__ne=article.feed_id).first() update_on_all_objs(articles=[article], feeds=[feed], cluster_enabled=True) self._clone_article(acontr, article, feed) ccontr.clusterize_pending_articles() cluster = ccontr.get(id=cluster.id) self.assertEqual(articles_count + 1, len(cluster.articles)) self.assertFalse(cluster.read)
def test_no_mixup(self): acontr = ArticleController() ccontr = ClusterController() total_clusters = len(list(ccontr.read())) total_articles = len(list(acontr.read())) for cluster in ccontr.read(): self.assertEqual(1, len(cluster.articles)) for article in acontr.read(): acontr.create(entry_id=article.entry_id, feed_id=article.feed_id, title=article.title, content=article.content, link=article.link) for user_id in ArticleController.get_user_id_with_pending_articles(): ClusterController(user_id).clusterize_pending_articles() self.assertEqual(2 * total_articles, len(list(acontr.read()))) self.assertEqual(2 * total_clusters, len(list(ccontr.read()))) for cluster in ccontr.read(): self.assertEqual(1, len(cluster.articles)) self.assertEqual(1, len({a.user_id for a in cluster.articles})) main_article = acontr.read().first() for article in acontr.read(): acontr.create(user_id=main_article.user_id, feed_id=main_article.feed_id, entry_id=article.entry_id, title=article.title, content=article.content, link=article.link) for cluster in ccontr.read(): self.assertEqual(1, len({a.user_id for a in cluster.articles}))
def setUp(self): super().setUp() self.actrl = ArticleController() article = self.actrl.read().first() ClusterController().delete(article.cluster_id, delete_articles=False) self.article = self.actrl.get(id=article.id) content_generator.get_content_generator.cache_clear()
def _test_unread_on_cluster(self, read_reason): ccontr = ClusterController() fcontr = FeedController() cluster = ccontr.read().first() clusterizer = Clusterizer() self.assertFalse(clusterizer.get_config(cluster, 'cluster_enabled')) self.assertTrue(clusterizer.get_config(cluster, 'cluster_wake_up')) ccontr.update({'id': cluster.id}, { 'read': True, 'read_reason': read_reason }) target_feed = fcontr.read(id__ne=cluster.main_article.feed_id, user_id=cluster.user_id).first() clusterizer = Clusterizer() self.assertFalse(clusterizer.get_config(target_feed, 'cluster_enabled')) fcontr.update( {'id__in': [f.id for f in cluster.feeds] + [target_feed.id]}, { 'cluster_wake_up': True, 'cluster_enabled': True }) clusterizer = Clusterizer() self.assertTrue(clusterizer.get_config(cluster, 'cluster_enabled')) target_feed = fcontr.read(id__ne=cluster.main_article.feed_id, user_id=cluster.user_id).first() article = self._clone_article(ArticleController(), cluster.main_article, target_feed) clusterizer = Clusterizer() self.assertTrue(clusterizer.get_config(article, 'cluster_wake_up')) ClusterController(cluster.user_id).clusterize_pending_articles() self.assertEqual(2, len(article.cluster.articles)) self.assertInCluster(article, cluster) return ccontr.get(id=cluster.id)
def test_article_get_unread(self): self.assertEqual({ 1: 3, 2: 3, 3: 3, 7: 3, 8: 3, 9: 3 }, ClusterController(2).count_by_feed(read=False)) self.assertEqual({ 4: 3, 5: 3, 6: 3, 10: 3, 11: 3, 12: 3 }, ClusterController(3).count_by_feed(read=False))
def test_similarity_clustering(self): cluster_conf = {'tfidf_min_score': 0.6, 'tfidf_min_sample_size': 10} user = Mock(cluster_conf=cluster_conf) category = Mock(cluster_conf=cluster_conf) feed = Mock(cluster_conf=cluster_conf, user=user, category=category) cluster = Mock() def gen_articles(factor): return [Mock(simple_vector={'Sarkozy': 1, 'garb': 1, 'justice': 1}, feed=feed, cluster=cluster)] \ + [Mock(feed=feed, simple_vector={'Sark': 1, 'garbge': 1, 'vote': 1}), Mock(feed=feed, simple_vector={'Sark': 1, 'garbae': 1, 'debat': 1}), Mock(feed=feed, simple_vector={'Sark': 1, 'garbag': 1, 'blague': 1}), Mock(feed=feed, simple_vector={'Sark': 1, 'garage': 1, 'chans': 1})] \ * factor ccontr = ClusterController() ccontr._get_query_for_clustering = Mock(return_value=gen_articles(2)) matching_article = Mock(simple_vector={ 'Morano': 1, 'garb': 1, 'justice': 1 }, date=utc_now(), lang='fr', feed=feed) self.assertIsNone(ccontr._get_cluster_by_similarity(matching_article)) ccontr._get_query_for_clustering = Mock(return_value=gen_articles(100)) self.assertEqual(ccontr._get_cluster_by_similarity(matching_article), cluster) solo_article = Mock(simple_vector={ 'Sark': 1, 'fleur': 1 }, date=utc_now(), lang='fr', feed=feed) self.assertNotEqual(cluster, ccontr._get_cluster_by_similarity(solo_article)) self.assertIsNone(ccontr._get_cluster_by_similarity(solo_article))
def delete(self, obj_id, commit=True): from jarr.controllers.cluster import ClusterController feed = self.get(id=obj_id) logger.debug('DELETE %r - Found feed', feed) clu_ctrl = ClusterController(self.user_id) logger.info('DELETE %r - removing back ref from cluster to article', feed) clu_ctrl.update( { 'user_id': feed.user_id, 'main_article_id__in': self.__actrl.read(feed_id=obj_id).with_entities('id') }, {'main_article_id': None}) def select_art(col): return select([col]).where(and_(Cluster.id == Article.cluster_id, Article.user_id == feed.user_id))\ .order_by(Article.date.asc()).limit(1) logger.info('DELETE %r - removing articles', feed) session.execute( delete(Article).where( and_(Article.feed_id == feed.id, Article.user_id == feed.user_id))) logger.info('DELETE %r - fixing cluster without main article', feed) clu_ctrl.update({ 'user_id': feed.user_id, 'main_article_id': None }, { 'main_title': select_art(Article.title), 'main_article_id': select_art(Article.id), 'main_feed_title': select([Feed.title]).where( and_(Cluster.id == Article.cluster_id, Article.user_id == feed.user_id, Feed.id == Article.feed_id, Feed.user_id == feed.user_id)).order_by(Article.date.asc()).limit(1) }) logger.info('DELETE %r - removing clusters without main article', feed) session.execute( delete(Cluster).where( and_(Cluster.user_id == feed.user_id, Cluster.main_article_id.__eq__(None)))) return super().delete(obj_id)
def put(): """Will mark all clusters selected by the filter as read.""" attrs = mark_as_read_parser.parse_args() filters = _get_filters(attrs) clu_ctrl = ClusterController(current_identity.id) clusters = [ clu for clu in clu_ctrl.join_read(limit=None, **filters) if not attrs.get("only_singles") or len(clu["feeds_id"]) == 1 ] if clusters: clu_ctrl.update({'id__in': [clu['id'] for clu in clusters]}, { 'read': True, 'read_reason': ReadReason.mass_marked }) READ.labels(ReadReason.mass_marked.value).inc(len(clusters)) return ClusterController(current_identity.id).get_unreads(), 200
def get(): """Return feeds with count of unread clusters.""" return ClusterController(current_identity.id).get_unreads(), 200
def get(): """Will list all cluster extract for the middle pannel.""" attrs = filter_parser.parse_args() clu_ctrl = ClusterController(current_identity.id) return list(clu_ctrl.join_read(**_get_filters(attrs)))
def test_delete(self): clu_ctrl = ClusterController() for cluster in clu_ctrl.read(): clu_ctrl.delete(cluster.id) self.assertEqual(0, ClusterController(2).read().count()) self.assertEqual(0, ArticleController(2).read().count())