def test_articles_with_enclosure_and_fetched_content(self, truncated_cnt, get_vector): self._clean_objs() get_vector.return_value = None truncated_cnt.return_value = {'type': 'fetched', 'title': 'holy grail', 'content': 'blue, no read, aaah', 'link': 'https://monthy.python/brian'} feed = FeedController().read().first() FeedController().update({'id': feed.id}, {'truncated_content': True, 'cluster_enabled': True}) UserController().update({'id': feed.user_id}, {'cluster_enabled': True}) builder = ClassicArticleBuilder(feed, self.entry_w_enclosure, {}) self.assertIsNone(builder.article.get('article_type')) raw_articles = list(builder.enhance()) self.assertEqual(2, len(raw_articles)) self.assertEqual('audio', raw_articles[1]['article_type'].value) articles = [] for raw_article in raw_articles: articles.append( ArticleController(feed.user_id).create(**raw_article)) ClusterController(feed.user_id).clusterize_pending_articles() a1 = ArticleController().get(id=articles[0].id) a2 = ArticleController().get(id=articles[1].id) self.assertEqual(a1.cluster_id, a2.cluster_id) cluster = ClusterController().get(id=a1.cluster_id) self.assertEqual(2, cluster.content['v']) self.assertEqual(1, len(cluster.content['contents'])) self.assertEqual('fetched', cluster.content['contents'][0]['type'])
def test_no_add_feed_skip(self): self.resp_status_code = 304 self.assertEqual(BASE_COUNT, ArticleController().read().count()) crawler() FeedController().update({}, { 'filters': [{ "type": "tag contains", "action on": "match", "pattern": "pattern5", "action": "skipped" }, { "type": "simple match", "action on": "match", "pattern": "pattern5", "action": "mark as read" }, { "type": "regex", "action on": "match", "pattern": "pattern5", "action": "skipped" }] }) crawler() self.assertEqual(BASE_COUNT, ArticleController().read().count())
def create_article_from(self, cluster, feed, link=None): self.assertEqual(cluster.user_id, feed.user_id) suffix = str(randint(0, 9999)) acontr = ArticleController(cluster.user_id) article = acontr.create( feed_id=feed.id, entry_id=cluster.main_article.entry_id + suffix, link=link or cluster.main_article.link, title=cluster.main_article.title + suffix, content=cluster.main_article.content + suffix, date=cluster.main_article.date + timedelta(1), retrieved_date=cluster.main_article.retrieved_date) ClusterController(cluster.user_id).clusterize_pending_articles() return acontr.read(id=article.id).first()
def test_adding_to_cluster_by_link(self): ccontr = ClusterController() cluster = ccontr.read().first() ccontr.update({'id': cluster.id}, { 'read': True, 'read_reason': 'marked' }) cluster = ccontr.get(id=cluster.id) self.assertTrue(cluster.read) article = cluster.articles[0] articles_count = len(cluster.articles) fcontr = FeedController(cluster.user_id) acontr = ArticleController(cluster.user_id) fcontr.update({'id': article.feed_id}, {'cluster_wake_up': True}) feed = fcontr.read(id__ne=article.feed_id).first() update_on_all_objs(articles=[article], feeds=[feed], cluster_enabled=True) self._clone_article(acontr, article, feed) ccontr.clusterize_pending_articles() cluster = ccontr.get(id=cluster.id) self.assertEqual(articles_count + 1, len(cluster.articles)) self.assertFalse(cluster.read)
def _test_unread_on_cluster(self, read_reason): ccontr = ClusterController() fcontr = FeedController() cluster = ccontr.read().first() clusterizer = Clusterizer() self.assertFalse(clusterizer.get_config(cluster, 'cluster_enabled')) self.assertTrue(clusterizer.get_config(cluster, 'cluster_wake_up')) ccontr.update({'id': cluster.id}, { 'read': True, 'read_reason': read_reason }) target_feed = fcontr.read(id__ne=cluster.main_article.feed_id, user_id=cluster.user_id).first() clusterizer = Clusterizer() self.assertFalse(clusterizer.get_config(target_feed, 'cluster_enabled')) fcontr.update( {'id__in': [f.id for f in cluster.feeds] + [target_feed.id]}, { 'cluster_wake_up': True, 'cluster_enabled': True }) clusterizer = Clusterizer() self.assertTrue(clusterizer.get_config(cluster, 'cluster_enabled')) target_feed = fcontr.read(id__ne=cluster.main_article.feed_id, user_id=cluster.user_id).first() article = self._clone_article(ArticleController(), cluster.main_article, target_feed) clusterizer = Clusterizer() self.assertTrue(clusterizer.get_config(article, 'cluster_wake_up')) ClusterController(cluster.user_id).clusterize_pending_articles() self.assertEqual(2, len(article.cluster.articles)) self.assertInCluster(article, cluster) return ccontr.get(id=cluster.id)
def main(self, article, filter_result=None): """Will add given article to a fitting cluster or create a cluster fitting that article.""" filter_result = filter_result or {} allow_clustering = filter_result.get('clustering', True) filter_read = filter_result.get('read', False) filter_liked = filter_result.get('liked', False) logger.info('%r - processed filter: %r', article, filter_result) cluster_config = self.get_config(article.feed, 'cluster_enabled') # fetching article so that vector comparison is made on full content ArticleController(article.user_id).enhance(article) if not allow_clustering: cluster_event(context='clustering', result='filter forbid') elif not cluster_config: cluster_event(context='clustering', result='config forbid') else: cluster = self._get_cluster_by_link(article) if not cluster: if not self.get_config(article.feed, 'cluster_tfidf_enabled'): cluster_event(context='tfidf', result='config forbid') elif article.article_type in NO_CLUSTER_TYPE: cluster_event(context='tfidf', result='wrong article type') else: cluster = self._get_cluster_by_similarity(article) if cluster: return self.enrich_cluster(cluster, article, filter_read, filter_liked) return self._create_from_article(article, filter_read, filter_liked)
def test_feed_and_article_deletion(self): ccontr = CategoryController(2) cat = ccontr.read().first() ccontr.delete(cat.id) self.assertEqual(0, ArticleController().read(category_id=cat.id).count()) self.assertEqual(0, FeedController().read(category_id=cat.id).count())
def scheduler(): logger.warning("Running scheduler") start = datetime.now() fctrl = FeedController() # browsing feeds to fetch feeds = list(fctrl.list_fetchable(conf.crawler.batch_size)) WORKER_BATCH.labels(worker_type='fetch-feed').observe(len(feeds)) logger.info('%d to enqueue', len(feeds)) for feed in feeds: logger.debug("%r: scheduling to be fetched", feed) process_feed.apply_async(args=[feed.id]) # browsing feeds to delete feeds_to_delete = list(fctrl.read(status=FeedStatus.to_delete)) if feeds_to_delete and REDIS_CONN.setnx(JARR_FEED_DEL_KEY, 'true'): REDIS_CONN.expire(JARR_FEED_DEL_KEY, LOCK_EXPIRE) logger.info('%d to delete, deleting one', len(feeds_to_delete)) for feed in feeds_to_delete: logger.debug("%r: scheduling to be delete", feed) feed_cleaner.apply_async(args=[feed.id]) break # only one at a time # applying clusterizer for user_id in ArticleController.get_user_id_with_pending_articles(): if not UserController().get(id=user_id).effectivly_active: continue if REDIS_CONN.setnx(JARR_CLUSTERIZER_KEY % user_id, 'true'): REDIS_CONN.expire(JARR_CLUSTERIZER_KEY % user_id, conf.crawler.clusterizer_delay) clusterizer.apply_async(args=[user_id]) scheduler.apply_async(countdown=conf.crawler.idle_delay) WORKER.labels(method='scheduler').observe( (datetime.now() - start).total_seconds()) update_slow_metrics.apply_async()
def test_http_crawler_add_articles(self): self.assertEqual(BASE_COUNT, ArticleController().read().count()) crawler() articles = list(ArticleController().read()) new_count = len(articles) self.assertNotEqual(BASE_COUNT, new_count) self.assertTrue(BASE_COUNT < new_count) for art in articles: self.assertFalse('srcset=' in art.content) self.assertFalse('src="/' in art.content) self.resp_status_code = 304 crawler() self.assertEqual(new_count, ArticleController().read().count())
def test_cluster_same_feed(self): article = ArticleController().read(category_id__ne=None).first() cluster = article.cluster # all is enabled, article in cluster update_on_all_objs(articles=cluster.articles, cluster_enabled=True, cluster_same_feed=True) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertInCluster(article, cluster) # feed's disabled, won't cluster FeedController().update( {'id__in': [a.feed_id for a in cluster.articles]}, {'cluster_same_feed': False}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertNotInCluster(article, cluster) # category's disabled, won't cluster FeedController().update( {'id__in': [a.feed_id for a in cluster.articles]}, {'cluster_same_feed': None}) CategoryController().update({'id': cluster.main_article.category.id}, {'cluster_same_feed': False}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertNotInCluster(article, cluster) # user's disable, won't cluster CategoryController().update({'id': cluster.main_article.category.id}, {'cluster_same_feed': None}) UserController().update({'id': cluster.user_id}, {'cluster_same_feed': False}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertNotInCluster(article, cluster) # reenabling user, will cluster UserController().update({'id': cluster.user_id}, {'cluster_same_feed': True}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertInCluster(article, cluster)
def test_cluster_tfidf_control(self): article = ArticleController().read(category_id__ne=None).first() cluster = article.cluster # leaving one cluster with one article clu_ids = [c.id for c in ClusterController().read(id__ne=cluster.id)] art_ids = [ a.id for a in ArticleController().read(id__ne=cluster.main_article_id) ] ArticleController().update({'id__in': art_ids}, {'cluster_id': None}) for clu_id in clu_ids: ClusterController().delete(clu_id) for art_id in art_ids: ArticleController().delete(art_id) self.assertEqual(1, ClusterController().read().count()) self.assertEqual(1, ArticleController().read().count()) feed1 = FeedController(cluster.user_id).create( title='new feed', cluster_conf={ 'tfidf_min_score': -1, 'tfidf_min_sample_size': 1 }) update_on_all_objs(articles=cluster.articles, feeds=[feed1], cluster_tfidf_enabled=True, cluster_enabled=True) feed2 = FeedController(cluster.user_id).create( cluster_enabled=True, cluster_tfidf_enabled=False, title='new feed', cluster_conf={ 'tfidf_min_score': -1, 'tfidf_min_sample_size': 1 }) article = self.create_article_from(cluster, feed1, link=cluster.main_article.link + 'do not match link') self.assertInCluster(article, cluster, ClusterReason.tf_idf) article = self.create_article_from(cluster, feed2, link=cluster.main_article.link + 'do not match link either') self.assertNotInCluster(article, cluster)
def test_delete_main_cluster_handling(self): suffix = 'suffix' clu = ClusterController().get(id=10) acontr = ArticleController(clu.user_id) fcontr = FeedController(clu.user_id) old_title = clu.main_title old_feed_title, old_art_id = clu.main_feed_title, clu.main_article_id for art_to_del in acontr.read(link=clu.main_article.link, id__ne=clu.main_article.id): acontr.delete(art_to_del.id) other_feed = fcontr.read(id__ne=clu.main_article.feed_id).first() update_on_all_objs(articles=[clu.main_article], feeds=[other_feed], cluster_enabled=True) acontr.create( feed_id=other_feed.id, entry_id=clu.main_article.entry_id + suffix, link=clu.main_article.link, title=clu.main_article.title + suffix, content=clu.main_article.content + suffix, date=clu.main_article.date + timedelta(1), retrieved_date=clu.main_article.retrieved_date + timedelta(1), ) ClusterController(clu.user_id).clusterize_pending_articles() clu = ClusterController().get(id=10) self.assertEqual(2, len(clu.articles)) fcontr.delete(clu.main_article.feed_id) new_cluster = ClusterController(clu.user_id).get(id=clu.id) self.assertEqual(1, len(new_cluster.articles)) self.assertNotEqual(old_title, new_cluster.main_title) self.assertNotEqual(old_feed_title, new_cluster.main_feed_title) self.assertNotEqual(old_art_id, new_cluster.main_article_id)
def populate_db(): fcontr = FeedController() ccontr = CategoryController() UserController().create( **{ 'is_admin': True, 'is_api': True, 'cluster_enabled': False, 'login': '******', 'password': '******' }) user1, user2 = [ UserController().create(login=name, cluster_enabled=False, email="*****@*****.**" % name, password=name) for name in ["user1", "user2"] ] for iteration in range(2): article_total = 0 for user in (user1, user2): for iter_cat in range(3): cat_id = None if iter_cat: cat_id = ccontr.create(user_id=user.id, name=to_name( user, iteration, iter_cat)).id feed_id = fcontr.create( link="feed%d%d" % (iteration, iter_cat), user_id=user.id, category_id=cat_id, title=to_name(user, iteration, iter_cat, iter_cat)).id for iter_art in range(3): entry = to_name(user, iteration, iter_cat, iter_cat, iter_art) tags = [ to_name(user, iteration, iter_cat, iter_cat, iter_art, str(i)) for i in range(2) ] article_total += 1 ArticleController().create( entry_id=entry, link='http://test.te/%d' % article_total, feed_id=feed_id, user_id=user.id, tags=tags, category_id=cat_id, title=entry, date=utc_now() + timedelta(seconds=iteration), content="content %d" % article_total) session.commit() session.flush() ClusterController().clusterize_pending_articles()
def test_matching_etag(self): self._reset_feeds_freshness(etag='fake etag') self.resp_headers = {'etag': 'fake etag'} self.assertEqual(BASE_COUNT, ArticleController().read().count()) crawler() self.assertEqual(BASE_COUNT, ArticleController().read().count()) self._reset_feeds_freshness(etag='jarr/"%s"' % to_hash(self._content)) self.resp_headers = {'etag': 'jarr/"%s"' % to_hash(self._content)} crawler() self.assertEqual(BASE_COUNT, ArticleController().read().count()) self._reset_feeds_freshness(etag='jarr/fake etag') self.resp_headers = {'etag': '########################'} crawler() self.assertNotEqual(BASE_COUNT, ArticleController().read().count())
def test_articles_with_enclosure(self): self._clean_objs() feed = FeedController().read().first() UserController().update({'id': feed.user_id}, {'cluster_enabled': True}) builder = ClassicArticleBuilder(feed, self.entry_w_enclosure, {}) self.assertIsNone(builder.article.get('article_type')) raw_articles = list(builder.enhance()) self.assertEqual(2, len(raw_articles)) self.assertEqual('audio', raw_articles[1]['article_type'].value) articles = [] for raw_article in raw_articles: articles.append( ArticleController(feed.user_id).create(**raw_article)) ClusterController(feed.user_id).clusterize_pending_articles() a1 = ArticleController().get(id=articles[0].id) a2 = ArticleController().get(id=articles[1].id) cluster = ClusterController().get(id=a1.cluster_id) self.assertEqual(a1.cluster_id, a2.cluster_id) self.assertEqual(2, cluster.content['v']) self.assertEqual(0, len(cluster.content['contents']))
def create_missing_article(self, response): logger.info('%r: cache validation failed, challenging entries', self.feed) parsed = self.parse_feed_response(response) if parsed is None: return ids, entries, skipped_list = [], {}, [] for entry in parsed['entries']: if not entry: continue builder = self.article_builder(self.feed, entry) if builder.do_skip_creation: skipped_list.append(builder.entry_ids) logger.debug('%r: skipping article', self.feed) continue entry_ids = builder.entry_ids entries[tuple(sorted(entry_ids.items()))] = builder ids.append(entry_ids) if not ids and skipped_list: logger.debug('%r: nothing to add (skipped %r) %r', self.feed, skipped_list, parsed) return logger.debug("%r: found %d entries %r", self.feed, len(ids), ids) article_created = False actrl = ArticleController(self.feed.user_id) new_entries_ids = list(actrl.challenge(ids=ids)) logger.debug("%r: %d entries wern't matched and will be created", self.feed, len(new_entries_ids)) for id_to_create in new_entries_ids: article_created = True builder = entries[tuple(sorted(id_to_create.items()))] new_article = builder.enhance() article = actrl.create(**new_article) logger.info('%r: created %r', self.feed, article) if not article_created: logger.info('%r: all article matched in db, adding nothing', self.feed)
def test_ClusterResource_delete(self): cluster = ClusterController().read().first() user = UserController().get(id=cluster.user_id) resp = self.jarr_client('delete', 'cluster', cluster.id) self.assertStatusCode(401, resp) resp = self.jarr_client('delete', 'cluster', cluster.id, user='******') self.assertStatusCode(403, resp) resp = self.jarr_client('delete', 'cluster', cluster.id, user=user.login) self.assertStatusCode(204, resp) self.assertEqual(0, ClusterController().read(id=cluster.id).count()) self.assertEqual(0, ArticleController().read(cluster_id=cluster.id).count())
def test_model_relations(self): article = ArticleController().read(category_id__ne=None).first() # article relations self.assertIsNotNone(article.cluster) self.assertIsNotNone(article.category) self.assertIsNotNone(article.feed) # feed parent relation self.assertEqual(article.category, article.feed.category) self.assertIn(article.cluster, article.feed.clusters) self.assertIn(article.cluster, article.category.clusters) self.assertIn(article.feed, article.cluster.feeds) self.assertIn(article.category, article.cluster.categories) self.assertIn(article.cluster.main_article, article.cluster.articles)
def test_MarkClustersAsRead_put_only_singles(self): feed = FeedController(self.user.id).read()[0] update_on_all_objs(feeds=[feed], cluster_same_feed=True, cluster_enabled=True) # creating a new article that will cluster ArticleController(self.user.id).create(entry_id='new entry_id', title='new title', content='new content', feed_id=feed.id, link=feed.articles[0].link) ClusterController(self.user.id).clusterize_pending_articles() self.assertClusterCount(18, {'filter': 'unread'}) # one per feed self._mark_as_read(2, {'only_singles': True, 'filter': 'unread'}) self.assertClusterCount(1, {'filter': 'unread'})
def test_no_cluster_same_category_on_original_category(self): article = ArticleController().read(category_id__ne=None).first() art_cat_id = article.category_id cat_ctrl = CategoryController(article.user_id) cluster = article.cluster feed = FeedController(cluster.user_id).create(title='new feed', category_id=art_cat_id) update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_same_category=None, cluster_enabled=True) cat_ctrl.update({'id': art_cat_id}, {'cluster_same_category': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) cat_ctrl.update({'id': art_cat_id}, {'cluster_same_category': True}) article = self.create_article_from(cluster, feed) self.assertInCluster(article, cluster)
def test_opml_dump_and_restore(self): # downloading OPML export file resp = self.jarr_client('get', '/opml', user=self.user.login) self.assertStatusCode(200, resp) opml_dump = resp.data.decode() self.assertTrue( opml_dump.startswith('<?xml version="1.0" encoding="utf-8"')) self.assertTrue(opml_dump.endswith('</opml>')) # cleaning db actrl = ArticleController(self.user.id) for item in actrl.read(): actrl.delete(item.id) self.assertEqual(0, ClusterController(self.user.id).read().count()) self.assertEqual(0, ArticleController(self.user.id).read().count()) no_category_feed = [] existing_feeds = {} for feed in self.fctrl.read(): if feed.category: if feed.category.name in existing_feeds: existing_feeds[feed.category.name].append(feed.title) else: existing_feeds[feed.category.name] = [feed.title] else: no_category_feed.append(feed.title) self.fctrl.delete(feed.id) for category in self.cctrl.read(): self.cctrl.delete(category.id) # re-importing OPML import_resp = self.jarr_client( 'post', 'opml', to_json=False, data={'opml_file': (BytesIO(resp.data), 'opml.xml')}, headers=None, user=self.user.login) self.assertStatusCode(201, import_resp) self.assertEqual(0, import_resp.json['existing']) self.assertEqual(0, import_resp.json['failed']) self._check_opml_imported(existing_feeds, no_category_feed) import_resp = self.jarr_client( 'post', 'opml', to_json=False, data={'opml_file': (BytesIO(resp.data), 'opml.xml')}, headers=None, user=self.user.login) self.assertStatusCode(200, import_resp) self.assertEqual(0, import_resp.json['created']) self.assertEqual(0, import_resp.json['failed'])
def _get_query_for_clustering(self, article, filters, filter_tfidf=False): time_delta = timedelta(days=conf.clustering.time_delta) date_cond = { 'date__lt': article.date + time_delta, 'date__gt': article.date - time_delta } retr_cond = { 'retrieved_date__lt': article.retrieved_date + time_delta, 'retrieved_date__gt': article.retrieved_date - time_delta } filters.update({ 'cluster_id__ne': None, 'user_id': article.user_id, 'id__ne': article.id, '__or__': [date_cond, retr_cond] }) if article.category_id \ and not self.get_config(article, 'cluster_same_category'): filters['category_id__ne'] = article.category_id if not self.get_config(article, 'cluster_same_feed'): filters['feed_id__ne'] = article.feed_id feed_join = [ Feed.id == Article.feed_id, or_(Feed.cluster_enabled.__eq__(True), Feed.cluster_enabled.__eq__(None)) ] if filter_tfidf: feed_join.append( or_(Feed.cluster_tfidf_enabled.__eq__(True), Feed.cluster_tfidf_enabled.__eq__(None))) query = ArticleController(article.user_id).read(**filters)\ .join(Feed, and_(*feed_join)) # operations involving categories are complicated, handling in software for candidate in query: if not self.get_config(candidate, "cluster_enabled"): continue if filter_tfidf and \ not self.get_config(candidate, "cluster_tfidf_enabled"): continue yield candidate
def test_cluster_disabled_on_original_category(self): article = ArticleController().read(category_id__ne=None).first() art_cat_id = article.category_id cat_ctrl = CategoryController(article.user_id) cluster = article.cluster fctrl = FeedController(cluster.user_id) feed = fctrl.create(title='new feed', category_id=art_cat_id) fno_cat = fctrl.create(title='category-less') update_on_all_objs(users=[cluster.user], cluster_enabled=None) cat_ctrl.update({}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertEqual(1, len(article.cluster.articles)) self.assertNotInCluster(article, cluster) article = self.create_article_from(cluster, fno_cat) self.assertEqual(1, len(article.cluster.articles)) self.assertNotInCluster(article, cluster) cat_ctrl.update({'id': art_cat_id}, {'cluster_enabled': True}) article = self.create_article_from(cluster, fno_cat) self.assertEqual(2, len(article.cluster.articles)) self.assertInCluster(article, cluster) article = self.create_article_from(cluster, feed) self.assertEqual(3, len(article.cluster.articles)) self.assertInCluster(article, cluster)
def scheduler(): logger.warning("Running scheduler") start = datetime.now() fctrl = FeedController() # browsing feeds to fetch queue = Queues.CRAWLING if conf.crawler.use_queues else Queues.DEFAULT feeds = list(fctrl.list_fetchable(conf.crawler.batch_size)) WORKER_BATCH.labels(worker_type='fetch-feed').observe(len(feeds)) logger.info('%d to enqueue', len(feeds)) for feed in feeds: logger.debug("%r: scheduling to be fetched on queue:%r", feed, queue.value) process_feed.apply_async(args=[feed.id], queue=queue.value) # browsing feeds to delete feeds_to_delete = list(fctrl.read(status=FeedStatus.to_delete)) if feeds_to_delete and REDIS_CONN.setnx(JARR_FEED_DEL_KEY, 'true'): REDIS_CONN.expire(JARR_FEED_DEL_KEY, LOCK_EXPIRE) logger.info('%d to delete, deleting one', len(feeds_to_delete)) for feed in feeds_to_delete: logger.debug("%r: scheduling to be delete", feed) feed_cleaner.apply_async(args=[feed.id]) # applying clusterizer queue = Queues.CLUSTERING if conf.crawler.use_queues else Queues.DEFAULT for user_id in ArticleController.get_user_id_with_pending_articles(): if REDIS_CONN.setnx(JARR_CLUSTERIZER_KEY % user_id, 'true'): REDIS_CONN.expire(JARR_CLUSTERIZER_KEY % user_id, conf.crawler.clusterizer_delay) logger.debug('Scheduling clusterizer for User(%d) on queue:%r', user_id, queue.value) clusterizer.apply_async(args=[user_id], queue=queue.value) scheduler.apply_async(countdown=conf.crawler.idle_delay) metrics_users_any.apply_async() metrics_users_active.apply_async() metrics_users_long_term.apply_async() metrics_articles_unclustered.apply_async() observe_worker_result_since(start, 'scheduler', 'ok')
def test_feed_rights(self): feed = FeedController(2).read()[0] self.assertEqual(3, ArticleController().read(feed_id=feed.id).count()) self._test_controller_rights(feed, UserController().get(id=feed.user_id))
def metrics_articles_unclustered(): logger.debug('Counting unclustered articles') unclustered = ArticleController().count_unclustered() ARTICLES.labels(status='unclustered').set(unclustered)
def test_feed_rights(self): cat = CategoryController(2).read().first() self.assertEqual(3, ArticleController().read(category_id=cat.id).count()) self.assertEqual(1, FeedController().read(category_id=cat.id).count()) self._test_controller_rights(cat, UserController().get(id=cat.user_id))
def _test_create_using_filters(self): # FIXME wait redo filters feed_ctr = FeedController(USER_ID) acontr = ArticleController(USER_ID) feed1, feed2, feed3 = [f for f in feed_ctr.read()][0:3] feed_ctr.update({'id': feed3.id}, { 'cluster_enabled': True, 'filters': [{ "type": "regex", "pattern": ".*(pattern1|pattern2).*", "action on": "no match", "action": "mark as favorite" }, { "type": "simple match", "pattern": "pattern3", "action on": "match", "action": "mark as read" }] }) feed_ctr.update({'id': feed1.id}, { 'filters': [{ "type": "simple match", "pattern": "pattern3", "action on": "match", "action": "mark as read" }] }) feed_ctr.update({'id': feed2.id}, { 'filters': [{ "type": "tag match", "pattern": "pattern4", "action on": "match", "action": "skipped" }, { "type": "tag contains", "pattern": "pattern5", "action on": "match", "action": "skipped" }] }) art1 = acontr.create(entry_id="will be read and faved 1", feed_id=feed1.id, title="garbage pattern1 pattern3 garbage", content="doesn't matter", link="cluster1") art2 = acontr.create(entry_id="will be ignored 2", feed_id=feed1.id, title="garbage see pattern garbage", content="doesn't matter2", link="is ignored 2") art3 = acontr.create(entry_id="will be read 3", user_id=2, feed_id=feed2.id, title="garbage pattern3 garbage", content="doesn't matter", link="doesn't matter either3") art4 = acontr.create(entry_id="will be ignored 4", user_id=2, feed_id=feed2.id, title="garbage see pattern garbage", content="doesn't matter2", link="doesn't matter either4") art5 = acontr.create(entry_id="will be faved 5", feed_id=feed3.id, title="garbage anti-attern3 garbage", content="doesn't matter", link="cluster1") art6 = acontr.create(entry_id="will be faved 6", feed_id=feed3.id, title="garbage pattern1 garbage", content="doesn't matter2", link="doesn't matter 6") art7 = acontr.create(entry_id="will be read 7", feed_id=feed3.id, title="garbage pattern3 garbage", content="doesn't matter3", link="doesn't matter either7") art8 = acontr.create(entry_id="will be ignored", feed_id=feed3.id, title="garbage pattern4 garbage", content="doesn't matter4-matter4_matter4", lang='fa_ke', link="doesn't matter either8") art9 = acontr.create(entry_id="unique9", feed_id=feed2.id, title="garbage", tags=['garbage', 'pattern4'], content="doesn't matterç", link="doesn't matter either9") art10 = acontr.create(entry_id="will be ignored", feed_id=feed2.id, title="garbage", tags=['pattern5 garbage', 'garbage'], content="doesn't matter10", link="doesn't matter either10") ClusterController(USER_ID).clusterize_pending_articles() self.assertTrue(acontr.get(id=art1.id).cluster.read) self.assertFalse(acontr.get(id=art1.id).cluster.liked) self.assertFalse(acontr.get(id=art2.id).cluster.read) self.assertFalse(acontr.get(id=art2.id).cluster.liked) self.assertFalse(acontr.get(id=art3.id).cluster.read) self.assertFalse(acontr.get(id=art3.id).cluster.liked) self.assertFalse(acontr.get(id=art4.id).cluster.read) self.assertFalse(acontr.get(id=art4.id).cluster.liked) self.assertTrue(art5.cluster.read, "should be read because it clustered") self.assertTrue(art5.cluster.liked) self.assertFalse(art6.cluster.read) self.assertFalse(art6.cluster.liked) self.assertTrue(art7.cluster.read) self.assertTrue(art7.cluster.liked) self.assertFalse(art8.cluster.read) self.assertTrue(art8.cluster.liked) self.assertIsNone(art9) self.assertEqual(0, acontr.read(entry_id='unique9').count()) self.assertIsNone(art10) self.assertEqual(0, acontr.read(entry_id='unique10').count())
def test_article_rights(self): article = ArticleController(USER_ID).read().first() self._test_controller_rights(article, UserController().get(id=article.user_id))
def delete(self, obj_id, commit=True): from jarr.controllers import ArticleController, ClusterController fltr = {"user_id": obj_id} ClusterController(self.user_id).update(fltr, {"main_article_id": None}) ArticleController(self.user_id).update(fltr, {"cluster_id": None}) return super().delete(obj_id)