Example #1
0
    def remove_from_cluster(self, article):
        """Removes article with id == article_id from the cluster it belongs to
        If it's the only article of the cluster will delete the cluster
        Return True if the article is deleted at the end or not
        """
        from jarr.controllers.cluster import ClusterController
        from jarr.controllers.article_clusterizer import Clusterizer
        if not article.cluster_id:
            return
        clu_ctrl = ClusterController(self.user_id)
        cluster = clu_ctrl.read(id=article.cluster_id).first()
        if not cluster:
            return

        try:
            new_art = next(new_art for new_art in cluster.articles
                           if new_art.id != article.id)
        except StopIteration:
            # only on article in cluster, deleting cluster
            clu_ctrl.delete(cluster.id, delete_articles=False)
        else:
            if cluster.main_article_id == article.id:
                cluster.main_article_id = None
                Clusterizer(article.user_id).enrich_cluster(
                        cluster, new_art, cluster.read, cluster.liked,
                        force_article_as_main=True)
        self.update({'id': article.id},
                    {'cluster_id': None,
                     'cluster_reason': None,
                     'cluster_score': None,
                     'cluster_tfidf_with': None,
                     'cluster_tfidf_neighbor_size': None})
Example #2
0
    def test_adding_to_cluster_by_link(self):
        ccontr = ClusterController()

        cluster = ccontr.read().first()
        ccontr.update({'id': cluster.id}, {
            'read': True,
            'read_reason': 'marked'
        })
        cluster = ccontr.get(id=cluster.id)
        self.assertTrue(cluster.read)
        article = cluster.articles[0]
        articles_count = len(cluster.articles)

        fcontr = FeedController(cluster.user_id)
        acontr = ArticleController(cluster.user_id)
        fcontr.update({'id': article.feed_id}, {'cluster_wake_up': True})
        feed = fcontr.read(id__ne=article.feed_id).first()
        update_on_all_objs(articles=[article],
                           feeds=[feed],
                           cluster_enabled=True)

        self._clone_article(acontr, article, feed)
        ccontr.clusterize_pending_articles()

        cluster = ccontr.get(id=cluster.id)
        self.assertEqual(articles_count + 1, len(cluster.articles))
        self.assertFalse(cluster.read)
Example #3
0
    def test_no_mixup(self):
        acontr = ArticleController()
        ccontr = ClusterController()
        total_clusters = len(list(ccontr.read()))
        total_articles = len(list(acontr.read()))
        for cluster in ccontr.read():
            self.assertEqual(1, len(cluster.articles))

        for article in acontr.read():
            acontr.create(entry_id=article.entry_id,
                          feed_id=article.feed_id,
                          title=article.title,
                          content=article.content,
                          link=article.link)

        for user_id in ArticleController.get_user_id_with_pending_articles():
            ClusterController(user_id).clusterize_pending_articles()
        self.assertEqual(2 * total_articles, len(list(acontr.read())))
        self.assertEqual(2 * total_clusters, len(list(ccontr.read())))

        for cluster in ccontr.read():
            self.assertEqual(1, len(cluster.articles))
            self.assertEqual(1, len({a.user_id for a in cluster.articles}))

        main_article = acontr.read().first()
        for article in acontr.read():
            acontr.create(user_id=main_article.user_id,
                          feed_id=main_article.feed_id,
                          entry_id=article.entry_id,
                          title=article.title,
                          content=article.content,
                          link=article.link)

        for cluster in ccontr.read():
            self.assertEqual(1, len({a.user_id for a in cluster.articles}))
Example #4
0
 def setUp(self):
     super().setUp()
     self.actrl = ArticleController()
     article = self.actrl.read().first()
     ClusterController().delete(article.cluster_id, delete_articles=False)
     self.article = self.actrl.get(id=article.id)
     content_generator.get_content_generator.cache_clear()
Example #5
0
 def _test_unread_on_cluster(self, read_reason):
     ccontr = ClusterController()
     fcontr = FeedController()
     cluster = ccontr.read().first()
     clusterizer = Clusterizer()
     self.assertFalse(clusterizer.get_config(cluster, 'cluster_enabled'))
     self.assertTrue(clusterizer.get_config(cluster, 'cluster_wake_up'))
     ccontr.update({'id': cluster.id}, {
         'read': True,
         'read_reason': read_reason
     })
     target_feed = fcontr.read(id__ne=cluster.main_article.feed_id,
                               user_id=cluster.user_id).first()
     clusterizer = Clusterizer()
     self.assertFalse(clusterizer.get_config(target_feed,
                                             'cluster_enabled'))
     fcontr.update(
         {'id__in': [f.id for f in cluster.feeds] + [target_feed.id]}, {
             'cluster_wake_up': True,
             'cluster_enabled': True
         })
     clusterizer = Clusterizer()
     self.assertTrue(clusterizer.get_config(cluster, 'cluster_enabled'))
     target_feed = fcontr.read(id__ne=cluster.main_article.feed_id,
                               user_id=cluster.user_id).first()
     article = self._clone_article(ArticleController(),
                                   cluster.main_article, target_feed)
     clusterizer = Clusterizer()
     self.assertTrue(clusterizer.get_config(article, 'cluster_wake_up'))
     ClusterController(cluster.user_id).clusterize_pending_articles()
     self.assertEqual(2, len(article.cluster.articles))
     self.assertInCluster(article, cluster)
     return ccontr.get(id=cluster.id)
Example #6
0
 def test_article_get_unread(self):
     self.assertEqual({
         1: 3,
         2: 3,
         3: 3,
         7: 3,
         8: 3,
         9: 3
     },
                      ClusterController(2).count_by_feed(read=False))
     self.assertEqual({
         4: 3,
         5: 3,
         6: 3,
         10: 3,
         11: 3,
         12: 3
     },
                      ClusterController(3).count_by_feed(read=False))
Example #7
0
    def test_similarity_clustering(self):
        cluster_conf = {'tfidf_min_score': 0.6, 'tfidf_min_sample_size': 10}
        user = Mock(cluster_conf=cluster_conf)
        category = Mock(cluster_conf=cluster_conf)
        feed = Mock(cluster_conf=cluster_conf, user=user, category=category)
        cluster = Mock()

        def gen_articles(factor):
            return [Mock(simple_vector={'Sarkozy': 1, 'garb': 1, 'justice': 1},
                         feed=feed, cluster=cluster)] \
                 + [Mock(feed=feed,
                        simple_vector={'Sark': 1, 'garbge': 1, 'vote': 1}),
                    Mock(feed=feed,
                        simple_vector={'Sark': 1, 'garbae': 1, 'debat': 1}),
                    Mock(feed=feed,
                        simple_vector={'Sark': 1, 'garbag': 1, 'blague': 1}),
                    Mock(feed=feed,
                        simple_vector={'Sark': 1, 'garage': 1, 'chans': 1})] \
                            * factor

        ccontr = ClusterController()
        ccontr._get_query_for_clustering = Mock(return_value=gen_articles(2))

        matching_article = Mock(simple_vector={
            'Morano': 1,
            'garb': 1,
            'justice': 1
        },
                                date=utc_now(),
                                lang='fr',
                                feed=feed)

        self.assertIsNone(ccontr._get_cluster_by_similarity(matching_article))
        ccontr._get_query_for_clustering = Mock(return_value=gen_articles(100))
        self.assertEqual(ccontr._get_cluster_by_similarity(matching_article),
                         cluster)

        solo_article = Mock(simple_vector={
            'Sark': 1,
            'fleur': 1
        },
                            date=utc_now(),
                            lang='fr',
                            feed=feed)
        self.assertNotEqual(cluster,
                            ccontr._get_cluster_by_similarity(solo_article))
        self.assertIsNone(ccontr._get_cluster_by_similarity(solo_article))
Example #8
0
    def delete(self, obj_id, commit=True):
        from jarr.controllers.cluster import ClusterController
        feed = self.get(id=obj_id)
        logger.debug('DELETE %r - Found feed', feed)
        clu_ctrl = ClusterController(self.user_id)

        logger.info('DELETE %r - removing back ref from cluster to article',
                    feed)
        clu_ctrl.update(
            {
                'user_id':
                feed.user_id,
                'main_article_id__in':
                self.__actrl.read(feed_id=obj_id).with_entities('id')
            }, {'main_article_id': None})

        def select_art(col):
            return select([col]).where(and_(Cluster.id == Article.cluster_id,
                                            Article.user_id == feed.user_id))\
                                .order_by(Article.date.asc()).limit(1)

        logger.info('DELETE %r - removing articles', feed)
        session.execute(
            delete(Article).where(
                and_(Article.feed_id == feed.id,
                     Article.user_id == feed.user_id)))

        logger.info('DELETE %r - fixing cluster without main article', feed)
        clu_ctrl.update({
            'user_id': feed.user_id,
            'main_article_id': None
        }, {
            'main_title':
            select_art(Article.title),
            'main_article_id':
            select_art(Article.id),
            'main_feed_title':
            select([Feed.title]).where(
                and_(Cluster.id == Article.cluster_id, Article.user_id
                     == feed.user_id, Feed.id == Article.feed_id, Feed.user_id
                     == feed.user_id)).order_by(Article.date.asc()).limit(1)
        })

        logger.info('DELETE %r - removing clusters without main article', feed)
        session.execute(
            delete(Cluster).where(
                and_(Cluster.user_id == feed.user_id,
                     Cluster.main_article_id.__eq__(None))))
        return super().delete(obj_id)
Example #9
0
 def put():
     """Will mark all clusters selected by the filter as read."""
     attrs = mark_as_read_parser.parse_args()
     filters = _get_filters(attrs)
     clu_ctrl = ClusterController(current_identity.id)
     clusters = [
         clu for clu in clu_ctrl.join_read(limit=None, **filters)
         if not attrs.get("only_singles") or len(clu["feeds_id"]) == 1
     ]
     if clusters:
         clu_ctrl.update({'id__in': [clu['id'] for clu in clusters]}, {
             'read': True,
             'read_reason': ReadReason.mass_marked
         })
     READ.labels(ReadReason.mass_marked.value).inc(len(clusters))
     return ClusterController(current_identity.id).get_unreads(), 200
Example #10
0
 def get():
     """Return feeds with count of unread clusters."""
     return ClusterController(current_identity.id).get_unreads(), 200
Example #11
0
 def get():
     """Will list all cluster extract for the middle pannel."""
     attrs = filter_parser.parse_args()
     clu_ctrl = ClusterController(current_identity.id)
     return list(clu_ctrl.join_read(**_get_filters(attrs)))
Example #12
0
 def test_delete(self):
     clu_ctrl = ClusterController()
     for cluster in clu_ctrl.read():
         clu_ctrl.delete(cluster.id)
     self.assertEqual(0, ClusterController(2).read().count())
     self.assertEqual(0, ArticleController(2).read().count())