def test_cluster_same_feed(self): article = ArticleController().read(category_id__ne=None).first() cluster = article.cluster # all is enabled, article in cluster update_on_all_objs(articles=cluster.articles, cluster_enabled=True, cluster_same_feed=True) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertInCluster(article, cluster) # feed's disabled, won't cluster FeedController().update( {'id__in': [a.feed_id for a in cluster.articles]}, {'cluster_same_feed': False}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertNotInCluster(article, cluster) # category's disabled, won't cluster FeedController().update( {'id__in': [a.feed_id for a in cluster.articles]}, {'cluster_same_feed': None}) CategoryController().update({'id': cluster.main_article.category.id}, {'cluster_same_feed': False}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertNotInCluster(article, cluster) # user's disable, won't cluster CategoryController().update({'id': cluster.main_article.category.id}, {'cluster_same_feed': None}) UserController().update({'id': cluster.user_id}, {'cluster_same_feed': False}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertNotInCluster(article, cluster) # reenabling user, will cluster UserController().update({'id': cluster.user_id}, {'cluster_same_feed': True}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertInCluster(article, cluster)
def test_delete_main_cluster_handling(self): suffix = 'suffix' clu = ClusterController().get(id=10) acontr = ArticleController(clu.user_id) fcontr = FeedController(clu.user_id) old_title = clu.main_title old_feed_title, old_art_id = clu.main_feed_title, clu.main_article_id for art_to_del in acontr.read(link=clu.main_article.link, id__ne=clu.main_article.id): acontr.delete(art_to_del.id) other_feed = fcontr.read(id__ne=clu.main_article.feed_id).first() update_on_all_objs(articles=[clu.main_article], feeds=[other_feed], cluster_enabled=True) acontr.create( feed_id=other_feed.id, entry_id=clu.main_article.entry_id + suffix, link=clu.main_article.link, title=clu.main_article.title + suffix, content=clu.main_article.content + suffix, date=clu.main_article.date + timedelta(1), retrieved_date=clu.main_article.retrieved_date + timedelta(1), ) ClusterController(clu.user_id).clusterize_pending_articles() clu = ClusterController().get(id=10) self.assertEqual(2, len(clu.articles)) fcontr.delete(clu.main_article.feed_id) new_cluster = ClusterController(clu.user_id).get(id=clu.id) self.assertEqual(1, len(new_cluster.articles)) self.assertNotEqual(old_title, new_cluster.main_title) self.assertNotEqual(old_feed_title, new_cluster.main_feed_title) self.assertNotEqual(old_art_id, new_cluster.main_article_id)
def test_adding_to_cluster_by_link(self): ccontr = ClusterController() cluster = ccontr.read().first() ccontr.update({'id': cluster.id}, { 'read': True, 'read_reason': 'marked' }) cluster = ccontr.get(id=cluster.id) self.assertTrue(cluster.read) article = cluster.articles[0] articles_count = len(cluster.articles) fcontr = FeedController(cluster.user_id) acontr = ArticleController(cluster.user_id) fcontr.update({'id': article.feed_id}, {'cluster_wake_up': True}) feed = fcontr.read(id__ne=article.feed_id).first() update_on_all_objs(articles=[article], feeds=[feed], cluster_enabled=True) self._clone_article(acontr, article, feed) ccontr.clusterize_pending_articles() cluster = ccontr.get(id=cluster.id) self.assertEqual(articles_count + 1, len(cluster.articles)) self.assertFalse(cluster.read)
def test_cluster_enabled(self): ccontr = ClusterController() cluster = ccontr.read().first() feed = FeedController(cluster.user_id).read( category_id__ne=None, id__nin=[art.feed_id for art in cluster.articles]).first() category = feed.category # clustering works when all is true update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) article = self.create_article_from(cluster, feed) self.assertInCluster(article, cluster) # disabling on user desactivate all clustering by default update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=None) UserController().update({'id': cluster.user_id}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) # disabling on article's feed prevents from clustering update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) FeedController().update({'id': feed.id}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) # disabling on feed from cluster's articles prevents from clustering update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) FeedController().update( {'id__in': [a.feed_id for a in cluster.articles]}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) # disabling on article's category prevents from clustering CategoryController(cluster.user_id).update({'id': category.id}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) article = self.create_article_from(cluster, feed) self.assertInCluster(article, cluster)
def test_MarkClustersAsRead_put_only_singles(self): feed = FeedController(self.user.id).read()[0] update_on_all_objs(feeds=[feed], cluster_same_feed=True, cluster_enabled=True) # creating a new article that will cluster ArticleController(self.user.id).create(entry_id='new entry_id', title='new title', content='new content', feed_id=feed.id, link=feed.articles[0].link) ClusterController(self.user.id).clusterize_pending_articles() self.assertClusterCount(18, {'filter': 'unread'}) # one per feed self._mark_as_read(2, {'only_singles': True, 'filter': 'unread'}) self.assertClusterCount(1, {'filter': 'unread'})
def test_cluster_tfidf_control(self): article = ArticleController().read(category_id__ne=None).first() cluster = article.cluster # leaving one cluster with one article clu_ids = [c.id for c in ClusterController().read(id__ne=cluster.id)] art_ids = [ a.id for a in ArticleController().read(id__ne=cluster.main_article_id) ] ArticleController().update({'id__in': art_ids}, {'cluster_id': None}) for clu_id in clu_ids: ClusterController().delete(clu_id) for art_id in art_ids: ArticleController().delete(art_id) self.assertEqual(1, ClusterController().read().count()) self.assertEqual(1, ArticleController().read().count()) feed1 = FeedController(cluster.user_id).create( title='new feed', cluster_conf={ 'tfidf_min_score': -1, 'tfidf_min_sample_size': 1 }) update_on_all_objs(articles=cluster.articles, feeds=[feed1], cluster_tfidf_enabled=True, cluster_enabled=True) feed2 = FeedController(cluster.user_id).create( cluster_enabled=True, cluster_tfidf_enabled=False, title='new feed', cluster_conf={ 'tfidf_min_score': -1, 'tfidf_min_sample_size': 1 }) article = self.create_article_from(cluster, feed1, link=cluster.main_article.link + 'do not match link') self.assertInCluster(article, cluster, ClusterReason.tf_idf) article = self.create_article_from(cluster, feed2, link=cluster.main_article.link + 'do not match link either') self.assertNotInCluster(article, cluster)
def test_no_cluster_same_category_on_original_category(self): article = ArticleController().read(category_id__ne=None).first() art_cat_id = article.category_id cat_ctrl = CategoryController(article.user_id) cluster = article.cluster feed = FeedController(cluster.user_id).create(title='new feed', category_id=art_cat_id) update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_same_category=None, cluster_enabled=True) cat_ctrl.update({'id': art_cat_id}, {'cluster_same_category': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) cat_ctrl.update({'id': art_cat_id}, {'cluster_same_category': True}) article = self.create_article_from(cluster, feed) self.assertInCluster(article, cluster)
def test_cluster_disabled_on_original_category(self): article = ArticleController().read(category_id__ne=None).first() art_cat_id = article.category_id cat_ctrl = CategoryController(article.user_id) cluster = article.cluster fctrl = FeedController(cluster.user_id) feed = fctrl.create(title='new feed', category_id=art_cat_id) fno_cat = fctrl.create(title='category-less') update_on_all_objs(users=[cluster.user], cluster_enabled=None) cat_ctrl.update({}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertEqual(1, len(article.cluster.articles)) self.assertNotInCluster(article, cluster) article = self.create_article_from(cluster, fno_cat) self.assertEqual(1, len(article.cluster.articles)) self.assertNotInCluster(article, cluster) cat_ctrl.update({'id': art_cat_id}, {'cluster_enabled': True}) article = self.create_article_from(cluster, fno_cat) self.assertEqual(2, len(article.cluster.articles)) self.assertInCluster(article, cluster) article = self.create_article_from(cluster, feed) self.assertEqual(3, len(article.cluster.articles)) self.assertInCluster(article, cluster)