def article_content_error_types(): """ Return an error classifier on the ``content_error`` attribute. """ return ContentErrorClassifier( Article.objects(content_error__ne='').no_cache(), 'content_error' ).classify()
def synchronize_mongodb_statsd_articles_gauges(full=False): """ synchronize all articles-related gauges on our statsd server. """ with benchmark('synchronize statsd gauges for Article.*'): empty = Article.objects(content_type=0).no_cache() # empty_pending = empty.filter(content_error='', url_error='') # empty_content_error = empty.filter(content_error__ne='') # empty_url_error = empty.filter(url_error__ne='') parsed = Article.objects( content_type__ne=CONTENT_TYPES.NONE) html = parsed.filter(content_type=CONTENT_TYPES.HTML) markdown = parsed.filter(content_type=CONTENT_TYPES.MARKDOWN) absolutes = Article.objects(url_absolute=True).no_cache() duplicates = Article.objects(duplicate_of__ne=None).no_cache() orphaned = Article.objects(orphaned=True).no_cache() content_errors = Article.objects(content_error__ne='').no_cache() url_errors = Article.objects(url_error__ne='').no_cache() statsd.gauge('mongo.articles.counts.total', Article._get_collection().count()) statsd.gauge('mongo.articles.counts.markdown', markdown.count()) statsd.gauge('mongo.articles.counts.html', html.count()) statsd.gauge('mongo.articles.counts.empty', empty.count()) statsd.gauge('mongo.articles.counts.content_errors', content_errors.count()) statsd.gauge('mongo.articles.counts.url_errors', url_errors.count()) if full: statsd.gauge('mongo.articles.counts.orphaned', orphaned.count()) statsd.gauge('mongo.articles.counts.absolutes', absolutes.count()) statsd.gauge('mongo.articles.counts.duplicates', duplicates.count())
def synchronize_mongodb_statsd_articles_gauges(full=False): """ synchronize all articles-related gauges on our statsd server. """ with benchmark('synchronize statsd gauges for Article.*'): empty = Article.objects(content_type=0).no_cache() # empty_pending = empty.filter(content_error='', url_error='') # empty_content_error = empty.filter(content_error__ne='') # empty_url_error = empty.filter(url_error__ne='') parsed = Article.objects(content_type__ne=CONTENT_TYPES.NONE) html = parsed.filter(content_type=CONTENT_TYPES.HTML) markdown = parsed.filter(content_type=CONTENT_TYPES.MARKDOWN) absolutes = Article.objects(url_absolute=True).no_cache() duplicates = Article.objects(duplicate_of__ne=None).no_cache() orphaned = Article.objects(orphaned=True).no_cache() content_errors = Article.objects(content_error__ne='').no_cache() url_errors = Article.objects(url_error__ne='').no_cache() statsd.gauge('mongo.articles.counts.total', Article._get_collection().count()) statsd.gauge('mongo.articles.counts.markdown', markdown.count()) statsd.gauge('mongo.articles.counts.html', html.count()) statsd.gauge('mongo.articles.counts.empty', empty.count()) statsd.gauge('mongo.articles.counts.content_errors', content_errors.count()) statsd.gauge('mongo.articles.counts.url_errors', url_errors.count()) if full: statsd.gauge('mongo.articles.counts.orphaned', orphaned.count()) statsd.gauge('mongo.articles.counts.absolutes', absolutes.count()) statsd.gauge('mongo.articles.counts.duplicates', duplicates.count())
def article_url_error_types(): """ Return an error classifier on the ``url_error`` attribute. """ # Next to investigate: # list index out of range: 758 # 'NoneType' object has no attribute 'findAll': 137 return UrlErrorClassifier( Article.objects(url_error__ne='').no_cache(), 'url_error').classify()
def article_url_error_types(): """ Return an error classifier on the ``url_error`` attribute. """ # Next to investigate: # list index out of range: 758 # 'NoneType' object has no attribute 'findAll': 137 return UrlErrorClassifier( Article.objects(url_error__ne='').no_cache(), 'url_error' ).classify()
def article_content_error_types(): """ Return an error classifier on the ``content_error`` attribute. """ return ContentErrorClassifier( Article.objects(content_error__ne='').no_cache(), 'content_error').classify()