def test_delete_main_cluster_handling(self): suffix = 'suffix' clu = ClusterController().get(id=10) acontr = ArticleController(clu.user_id) fcontr = FeedController(clu.user_id) old_title = clu.main_title old_feed_title, old_art_id = clu.main_feed_title, clu.main_article_id for art_to_del in acontr.read(link=clu.main_article.link, id__ne=clu.main_article.id): acontr.delete(art_to_del.id) other_feed = fcontr.read(id__ne=clu.main_article.feed_id).first() update_on_all_objs(articles=[clu.main_article], feeds=[other_feed], cluster_enabled=True) acontr.create( feed_id=other_feed.id, entry_id=clu.main_article.entry_id + suffix, link=clu.main_article.link, title=clu.main_article.title + suffix, content=clu.main_article.content + suffix, date=clu.main_article.date + timedelta(1), retrieved_date=clu.main_article.retrieved_date + timedelta(1), ) ClusterController(clu.user_id).clusterize_pending_articles() clu = ClusterController().get(id=10) self.assertEqual(2, len(clu.articles)) fcontr.delete(clu.main_article.feed_id) new_cluster = ClusterController(clu.user_id).get(id=clu.id) self.assertEqual(1, len(new_cluster.articles)) self.assertNotEqual(old_title, new_cluster.main_title) self.assertNotEqual(old_feed_title, new_cluster.main_feed_title) self.assertNotEqual(old_art_id, new_cluster.main_article_id)
def test_articles_with_enclosure_and_fetched_content(self, truncated_cnt, get_vector): self._clean_objs() get_vector.return_value = None truncated_cnt.return_value = {'type': 'fetched', 'title': 'holy grail', 'content': 'blue, no read, aaah', 'link': 'https://monthy.python/brian'} feed = FeedController().read().first() FeedController().update({'id': feed.id}, {'truncated_content': True, 'cluster_enabled': True}) UserController().update({'id': feed.user_id}, {'cluster_enabled': True}) builder = ClassicArticleBuilder(feed, self.entry_w_enclosure, {}) self.assertIsNone(builder.article.get('article_type')) raw_articles = list(builder.enhance()) self.assertEqual(2, len(raw_articles)) self.assertEqual('audio', raw_articles[1]['article_type'].value) articles = [] for raw_article in raw_articles: articles.append( ArticleController(feed.user_id).create(**raw_article)) ClusterController(feed.user_id).clusterize_pending_articles() a1 = ArticleController().get(id=articles[0].id) a2 = ArticleController().get(id=articles[1].id) self.assertEqual(a1.cluster_id, a2.cluster_id) cluster = ClusterController().get(id=a1.cluster_id) self.assertEqual(2, cluster.content['v']) self.assertEqual(1, len(cluster.content['contents'])) self.assertEqual('fetched', cluster.content['contents'][0]['type'])
def delete(cluster_id): try: ClusterController(current_identity.id).delete(cluster_id) except NotFound: user_id = ClusterController().get(id=cluster_id).user_id if user_id != current_identity.id: raise Forbidden() raise return None, 204
def test_delete_cluster_handling(self): clu = ClusterController().get(id=10) old_title = clu.main_title old_feed_title, old_art_id = clu.main_feed_title, clu.main_article_id self.assertEqual(1, len(clu.articles)) new_cluster = ClusterController(clu.user_id).get(id=clu.id) self.assertEqual(1, len(new_cluster.articles)) self.assertEqual(old_title, new_cluster.main_title) self.assertEqual(old_feed_title, new_cluster.main_feed_title) self.assertEqual(old_art_id, new_cluster.main_article_id)
def test_ClusterResource_delete(self): cluster = ClusterController().read().first() user = UserController().get(id=cluster.user_id) resp = self.jarr_client('delete', 'cluster', cluster.id) self.assertStatusCode(401, resp) resp = self.jarr_client('delete', 'cluster', cluster.id, user='******') self.assertStatusCode(403, resp) resp = self.jarr_client('delete', 'cluster', cluster.id, user=user.login) self.assertStatusCode(204, resp) self.assertEqual(0, ClusterController().read(id=cluster.id).count()) self.assertEqual(0, ArticleController().read(cluster_id=cluster.id).count())
def remove_from_cluster(self, article): """Removes article with id == article_id from the cluster it belongs to If it's the only article of the cluster will delete the cluster Return True if the article is deleted at the end or not """ from jarr.controllers import ClusterController if not article.cluster_id: return clu_ctrl = ClusterController(self.user_id) cluster = clu_ctrl.read(id=article.cluster_id).first() if not cluster: return try: new_art = next(new_art for new_art in cluster.articles if new_art.id != article.id) except StopIteration: # only on article in cluster, deleting cluster clu_ctrl.delete(cluster.id, delete_articles=False) else: if cluster.main_article_id == article.id: cluster.main_article_id = None clu_ctrl.enrich_cluster(cluster, new_art, cluster.read, cluster.liked, force_article_as_main=True) self.update({'id': article.id}, { 'cluster_id': None, 'cluster_reason': None, 'cluster_score': None, 'cluster_tfidf_with': None, 'cluster_tfidf_neighbor_size': None }) return
def test_cluster_tfidf_control(self): article = ArticleController().read(category_id__ne=None).first() cluster = article.cluster # leaving one cluster with one article clu_ids = [c.id for c in ClusterController().read(id__ne=cluster.id)] art_ids = [ a.id for a in ArticleController().read(id__ne=cluster.main_article_id) ] ArticleController().update({'id__in': art_ids}, {'cluster_id': None}) for clu_id in clu_ids: ClusterController().delete(clu_id) for art_id in art_ids: ArticleController().delete(art_id) self.assertEqual(1, ClusterController().read().count()) self.assertEqual(1, ArticleController().read().count()) feed1 = FeedController(cluster.user_id).create( title='new feed', cluster_conf={ 'tfidf_min_score': -1, 'tfidf_min_sample_size': 1 }) update_on_all_objs(articles=cluster.articles, feeds=[feed1], cluster_tfidf_enabled=True, cluster_enabled=True) feed2 = FeedController(cluster.user_id).create( cluster_enabled=True, cluster_tfidf_enabled=False, title='new feed', cluster_conf={ 'tfidf_min_score': -1, 'tfidf_min_sample_size': 1 }) article = self.create_article_from(cluster, feed1, link=cluster.main_article.link + 'do not match link') self.assertInCluster(article, cluster, ClusterReason.tf_idf) article = self.create_article_from(cluster, feed2, link=cluster.main_article.link + 'do not match link either') self.assertNotInCluster(article, cluster)
def populate_db(): fcontr = FeedController() ccontr = CategoryController() UserController().create( **{ 'is_admin': True, 'is_api': True, 'cluster_enabled': False, 'login': '******', 'password': '******' }) user1, user2 = [ UserController().create(login=name, cluster_enabled=False, email="*****@*****.**" % name, password=name) for name in ["user1", "user2"] ] for iteration in range(2): article_total = 0 for user in (user1, user2): for iter_cat in range(3): cat_id = None if iter_cat: cat_id = ccontr.create(user_id=user.id, name=to_name( user, iteration, iter_cat)).id feed_id = fcontr.create( link="feed%d%d" % (iteration, iter_cat), user_id=user.id, category_id=cat_id, title=to_name(user, iteration, iter_cat, iter_cat)).id for iter_art in range(3): entry = to_name(user, iteration, iter_cat, iter_cat, iter_art) tags = [ to_name(user, iteration, iter_cat, iter_cat, iter_art, str(i)) for i in range(2) ] article_total += 1 ArticleController().create( entry_id=entry, link='http://test.te/%d' % article_total, feed_id=feed_id, user_id=user.id, tags=tags, category_id=cat_id, title=entry, date=utc_now() + timedelta(seconds=iteration), content="content %d" % article_total) session.commit() session.flush() ClusterController().clusterize_pending_articles()
def test_admin_update_cluster_on_change_title(self): feed = ClusterController(2).read()[0].main_article.feed for cluster in feed.clusters: self.assertEqual(feed.title, cluster.main_feed_title) FeedController().update({'id': feed.id}, {'title': 'updated title'}) feed = FeedController().get(id=feed.id) self.assertEqual('updated title', feed.title) for cluster in feed.clusters: self.assertEqual(feed.title, cluster.main_feed_title)
def test_cluster_enabled(self): ccontr = ClusterController() cluster = ccontr.read().first() feed = FeedController(cluster.user_id).read( category_id__ne=None, id__nin=[art.feed_id for art in cluster.articles]).first() category = feed.category # clustering works when all is true update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) article = self.create_article_from(cluster, feed) self.assertInCluster(article, cluster) # disabling on user desactivate all clustering by default update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=None) UserController().update({'id': cluster.user_id}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) # disabling on article's feed prevents from clustering update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) FeedController().update({'id': feed.id}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) # disabling on feed from cluster's articles prevents from clustering update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) FeedController().update( {'id__in': [a.feed_id for a in cluster.articles]}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) # disabling on article's category prevents from clustering CategoryController(cluster.user_id).update({'id': category.id}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) article = self.create_article_from(cluster, feed) self.assertInCluster(article, cluster)
def test_ClusterResource_get(self): user = UserController().get(login='******') cluster = ClusterController(user.id).read().first() resp = self.jarr_client('get', 'cluster', cluster.id) self.assertStatusCode(401, resp) resp = self.jarr_client('get', 'cluster', cluster.id, user='******') self.assertStatusCode(403, resp) resp = self.jarr_client('get', 'cluster', cluster.id, user=user.login) self.assertStatusCode(226, resp) self.assertEqual(1, len(resp.json['articles'])) resp = self.jarr_client('get', 'cluster', cluster.id, user=user.login) self.assertStatusCode(200, resp)
def test_articles_with_enclosure(self): self._clean_objs() feed = FeedController().read().first() UserController().update({'id': feed.user_id}, {'cluster_enabled': True}) builder = ClassicArticleBuilder(feed, self.entry_w_enclosure, {}) self.assertIsNone(builder.article.get('article_type')) raw_articles = list(builder.enhance()) self.assertEqual(2, len(raw_articles)) self.assertEqual('audio', raw_articles[1]['article_type'].value) articles = [] for raw_article in raw_articles: articles.append( ArticleController(feed.user_id).create(**raw_article)) ClusterController(feed.user_id).clusterize_pending_articles() a1 = ArticleController().get(id=articles[0].id) a2 = ArticleController().get(id=articles[1].id) cluster = ClusterController().get(id=a1.cluster_id) self.assertEqual(a1.cluster_id, a2.cluster_id) self.assertEqual(2, cluster.content['v']) self.assertEqual(0, len(cluster.content['contents']))
def test_opml_dump_and_restore(self): # downloading OPML export file resp = self.jarr_client('get', '/opml', user=self.user.login) self.assertStatusCode(200, resp) opml_dump = resp.data.decode() self.assertTrue( opml_dump.startswith('<?xml version="1.0" encoding="utf-8"')) self.assertTrue(opml_dump.endswith('</opml>')) # cleaning db actrl = ArticleController(self.user.id) for item in actrl.read(): actrl.delete(item.id) self.assertEqual(0, ClusterController(self.user.id).read().count()) self.assertEqual(0, ArticleController(self.user.id).read().count()) no_category_feed = [] existing_feeds = {} for feed in self.fctrl.read(): if feed.category: if feed.category.name in existing_feeds: existing_feeds[feed.category.name].append(feed.title) else: existing_feeds[feed.category.name] = [feed.title] else: no_category_feed.append(feed.title) self.fctrl.delete(feed.id) for category in self.cctrl.read(): self.cctrl.delete(category.id) # re-importing OPML import_resp = self.jarr_client( 'post', 'opml', to_json=False, data={'opml_file': (BytesIO(resp.data), 'opml.xml')}, headers=None, user=self.user.login) self.assertStatusCode(201, import_resp) self.assertEqual(0, import_resp.json['existing']) self.assertEqual(0, import_resp.json['failed']) self._check_opml_imported(existing_feeds, no_category_feed) import_resp = self.jarr_client( 'post', 'opml', to_json=False, data={'opml_file': (BytesIO(resp.data), 'opml.xml')}, headers=None, user=self.user.login) self.assertStatusCode(200, import_resp) self.assertEqual(0, import_resp.json['created']) self.assertEqual(0, import_resp.json['failed'])
def create_article_from(self, cluster, feed, link=None): self.assertEqual(cluster.user_id, feed.user_id) suffix = str(randint(0, 9999)) acontr = ArticleController(cluster.user_id) article = acontr.create( feed_id=feed.id, entry_id=cluster.main_article.entry_id + suffix, link=link or cluster.main_article.link, title=cluster.main_article.title + suffix, content=cluster.main_article.content + suffix, date=cluster.main_article.date + timedelta(1), retrieved_date=cluster.main_article.retrieved_date) ClusterController(cluster.user_id).clusterize_pending_articles() return acontr.read(id=article.id).first()
def put(cluster_id): cctrl = ClusterController(current_identity.id) attrs = parse_meaningful_params(cluster_parser) if 'read_reason' in attrs: pass # not overriding given read reason elif 'read' in attrs and attrs.get('read'): attrs['read_reason'] = ReadReason.marked READ.labels(reason=ReadReason.marked.value).inc() elif 'read' in attrs and not attrs.get('read'): attrs['read_reason'] = None changed = cctrl.update({'id': cluster_id}, attrs) if not changed: cctrl.assert_right_ok(cluster_id) return None, 204
def get(cluster_id): cluc = ClusterController() cluster = cluc.get(id=cluster_id) if cluster.user_id != current_identity.id: raise Forbidden() code = 200 if not cluster.read: cluc.update({'id': cluster_id}, {'read': True, 'read_reason': ReadReason.read}) READ.labels(reason=ReadReason.read.value).inc() cluster.read = True cluster.read_reason = ReadReason.read code = 226 return cluster, code
def test_MarkClustersAsRead_put_only_singles(self): feed = FeedController(self.user.id).read()[0] update_on_all_objs(feeds=[feed], cluster_same_feed=True, cluster_enabled=True) # creating a new article that will cluster ArticleController(self.user.id).create(entry_id='new entry_id', title='new title', content='new content', feed_id=feed.id, link=feed.articles[0].link) ClusterController(self.user.id).clusterize_pending_articles() self.assertClusterCount(18, {'filter': 'unread'}) # one per feed self._mark_as_read(2, {'only_singles': True, 'filter': 'unread'}) self.assertClusterCount(1, {'filter': 'unread'})
def test_ClusterResource_put(self): cluster = ClusterController().read().first() user = UserController().get(id=cluster.user_id) resp = self.jarr_client('put', 'cluster', cluster.id, data={'read': True}) self.assertStatusCode(401, resp) resp = self.jarr_client('put', 'cluster', cluster.id, data={'read': True}, user='******') self.assertStatusCode(403, resp) # marking as read resp = self.jarr_client('put', 'cluster', cluster.id, data={'read': True}, user=user.login) self.assertStatusCode(204, resp) cluster = ClusterController().get(id=cluster.id) self.assertTrue(cluster.read) self.assertFalse(cluster.liked) self.assertEqual('marked', cluster.read_reason.value) # marking as read / consulted resp = self.jarr_client('put', 'cluster', cluster.id, data={'read_reason': 'consulted', 'read': True}, user=user.login) self.assertStatusCode(204, resp) cluster = ClusterController().get(id=cluster.id) self.assertTrue(cluster.read) self.assertFalse(cluster.liked) self.assertEqual('consulted', cluster.read_reason.value) # marking as liked resp = self.jarr_client('put', 'cluster', cluster.id, data={'liked': True}, user=user.login) self.assertStatusCode(204, resp) self.assertTrue(ClusterController().get(id=cluster.id).read) self.assertTrue(ClusterController().get(id=cluster.id).liked) resp = self.jarr_client('put', 'cluster', cluster.id, data={'liked': False, 'read': False}, user=user.login) self.assertStatusCode(204, resp) self.assertFalse(ClusterController().get(id=cluster.id).read) self.assertFalse(ClusterController().get(id=cluster.id).liked) self.assertIsNone(ClusterController().get(id=cluster.id).read_reason)
def _test_create_using_filters(self): # FIXME wait redo filters feed_ctr = FeedController(USER_ID) acontr = ArticleController(USER_ID) feed1, feed2, feed3 = [f for f in feed_ctr.read()][0:3] feed_ctr.update({'id': feed3.id}, { 'cluster_enabled': True, 'filters': [{ "type": "regex", "pattern": ".*(pattern1|pattern2).*", "action on": "no match", "action": "mark as favorite" }, { "type": "simple match", "pattern": "pattern3", "action on": "match", "action": "mark as read" }] }) feed_ctr.update({'id': feed1.id}, { 'filters': [{ "type": "simple match", "pattern": "pattern3", "action on": "match", "action": "mark as read" }] }) feed_ctr.update({'id': feed2.id}, { 'filters': [{ "type": "tag match", "pattern": "pattern4", "action on": "match", "action": "skipped" }, { "type": "tag contains", "pattern": "pattern5", "action on": "match", "action": "skipped" }] }) art1 = acontr.create(entry_id="will be read and faved 1", feed_id=feed1.id, title="garbage pattern1 pattern3 garbage", content="doesn't matter", link="cluster1") art2 = acontr.create(entry_id="will be ignored 2", feed_id=feed1.id, title="garbage see pattern garbage", content="doesn't matter2", link="is ignored 2") art3 = acontr.create(entry_id="will be read 3", user_id=2, feed_id=feed2.id, title="garbage pattern3 garbage", content="doesn't matter", link="doesn't matter either3") art4 = acontr.create(entry_id="will be ignored 4", user_id=2, feed_id=feed2.id, title="garbage see pattern garbage", content="doesn't matter2", link="doesn't matter either4") art5 = acontr.create(entry_id="will be faved 5", feed_id=feed3.id, title="garbage anti-attern3 garbage", content="doesn't matter", link="cluster1") art6 = acontr.create(entry_id="will be faved 6", feed_id=feed3.id, title="garbage pattern1 garbage", content="doesn't matter2", link="doesn't matter 6") art7 = acontr.create(entry_id="will be read 7", feed_id=feed3.id, title="garbage pattern3 garbage", content="doesn't matter3", link="doesn't matter either7") art8 = acontr.create(entry_id="will be ignored", feed_id=feed3.id, title="garbage pattern4 garbage", content="doesn't matter4-matter4_matter4", lang='fa_ke', link="doesn't matter either8") art9 = acontr.create(entry_id="unique9", feed_id=feed2.id, title="garbage", tags=['garbage', 'pattern4'], content="doesn't matterç", link="doesn't matter either9") art10 = acontr.create(entry_id="will be ignored", feed_id=feed2.id, title="garbage", tags=['pattern5 garbage', 'garbage'], content="doesn't matter10", link="doesn't matter either10") ClusterController(USER_ID).clusterize_pending_articles() self.assertTrue(acontr.get(id=art1.id).cluster.read) self.assertFalse(acontr.get(id=art1.id).cluster.liked) self.assertFalse(acontr.get(id=art2.id).cluster.read) self.assertFalse(acontr.get(id=art2.id).cluster.liked) self.assertFalse(acontr.get(id=art3.id).cluster.read) self.assertFalse(acontr.get(id=art3.id).cluster.liked) self.assertFalse(acontr.get(id=art4.id).cluster.read) self.assertFalse(acontr.get(id=art4.id).cluster.liked) self.assertTrue(art5.cluster.read, "should be read because it clustered") self.assertTrue(art5.cluster.liked) self.assertFalse(art6.cluster.read) self.assertFalse(art6.cluster.liked) self.assertTrue(art7.cluster.read) self.assertTrue(art7.cluster.liked) self.assertFalse(art8.cluster.read) self.assertTrue(art8.cluster.liked) self.assertIsNone(art9) self.assertEqual(0, acontr.read(entry_id='unique9').count()) self.assertIsNone(art10) self.assertEqual(0, acontr.read(entry_id='unique10').count())
def delete(self, obj_id, commit=True): from jarr.controllers import ArticleController, ClusterController fltr = {"user_id": obj_id} ClusterController(self.user_id).update(fltr, {"main_article_id": None}) ArticleController(self.user_id).update(fltr, {"cluster_id": None}) return super().delete(obj_id)
def clusterizer(user_id): logger.warning("Gonna clusterize pending articles") ClusterController(user_id).clusterize_pending_articles()
def test_delete(self): feed_ctrl = FeedController() for feed in feed_ctrl.read(): feed_ctrl.delete(feed.id) self.assertEqual(0, ClusterController(2).read().count()) self.assertEqual(0, ArticleController(2).read().count())
def clusterizer(user_id): logger.warning("Gonna clusterize pending articles") ClusterController(user_id).clusterize_pending_articles() REDIS_CONN.delete(JARR_CLUSTERIZER_KEY % user_id)
def _clean_objs(self): for ctrl in ArticleController, ClusterController: for obj in ctrl().read(): ctrl().delete(obj.id) self.assertEqual(0, ArticleController().read().count()) self.assertEqual(0, ClusterController().read().count())