コード例 #1
0
def dashboard_classifier():
    docs = db_articles.find()

    if 'stats' not in cache:
        stats = defaultdict(Counter)

        for doc in docs:
            if 'machine' in doc.get('analysis', {}):
                timestamp = time.mktime(doc['time_added'].date().timetuple())

                stats[timestamp]['remove'] += len(
                    doc['analysis']['machine']['candidates']['remove'])
                stats[timestamp]['confirm'] += len(
                    doc['analysis']['machine']['candidates']['confirm'])

        stats_json = []
        for day, day_stats in sorted(stats.items()):
            day_stats = dict(day_stats)
            day_stats['day'] = day
            stats_json.append(day_stats)

        cache['stats'] = stats_json

    return render_template('dashboard_classifier.html',
                           stats_json=json.dumps(cache))
コード例 #2
0
def dashboard_article(query_id):
    query = dashboard_query_index[query_id]
    docs = list(db_articles.find(query['query']))

    for doc in docs:
        doc['page'] = Page.get(doc['pages'][0])

    return render_template('dashboard_query.html', query=query, articles=docs)
コード例 #3
0
def moderation_queue():
    articles = list(db_articles.find({'state': 'moderated'}) \
                          .sort([('time_added', -1)])[:50])

    for doc in articles:
        if 'pages' in doc:
            doc['page'] = Page.get(doc['pages'][0])

    return render_template('moderation_queue.html', articles=articles)
コード例 #4
0
ファイル: server_dashboard.py プロジェクト: tfgg/ppsay
def moderation_queue():
    articles = list(db_articles.find({'state': 'moderated'}) \
                          .sort([('time_added', -1)])[:50])

    for doc in articles:
        if 'pages' in doc:
            doc['page'] = Page.get(doc['pages'][0])

    return render_template('moderation_queue.html',
                           articles=articles)
コード例 #5
0
ファイル: server_dashboard.py プロジェクト: tfgg/ppsay
def dashboard_article(query_id):
    query = dashboard_query_index[query_id]
    docs = list(db_articles.find(query['query']))

    for doc in docs:
        doc['page'] = Page.get(doc['pages'][0])

    return render_template('dashboard_query.html',
                           query=query,
                           articles=docs)
コード例 #6
0
ファイル: server_dashboard.py プロジェクト: tfgg/ppsay
def dashboard_classifier():
    docs = db_articles.find()

    if 'stats' not in cache:
        stats = defaultdict(Counter)

        for doc in docs:
            if 'machine' in doc.get('analysis', {}):
                timestamp = time.mktime(doc['time_added'].date().timetuple())

                stats[timestamp]['remove'] += len(doc['analysis']['machine']['candidates']['remove'])
                stats[timestamp]['confirm'] += len(doc['analysis']['machine']['candidates']['confirm'])

        stats_json = []
        for day, day_stats in sorted(stats.items()):
            day_stats = dict(day_stats)
            day_stats['day'] = day
            stats_json.append(day_stats)

        cache['stats'] = stats_json
    
    return render_template('dashboard_classifier.html',
                           stats_json=json.dumps(cache))
コード例 #7
0
ファイル: remove_stream.py プロジェクト: tfgg/ppsay
from ppsay.db import db_articles
from ppsay.article import Article

docs = db_articles.find()

for doc in docs:
    article = Article(doc)

    print article.id

    num_final_candidates =len([x for x in article.analysis['final']['candidates'] if x['state'] not in ['removed', 'removed_ml']])
    num_final_constituencies =len([x for x in article.analysis['final']['constituencies'] if x['state'] not in ['removed', 'removed_ml']])

    if num_final_candidates == 0 and num_final_constituencies == 0:
        article.update_stream()

コード例 #8
0
ファイル: migrate_pages.py プロジェクト: tfgg/ppsay
from ppsay.page import Page
from ppsay.db import db_articles
from urlparse import urlparse

for doc in db_articles.find():
    if 'page' not in doc or doc['page'] is None:
        print "Skipping"
        continue

    page_ids = []

    for url, final_url in zip(doc['page']['urls'], doc['page']['final_urls']):
        print url, final_url

        d = doc['page']

        d['url'] = url
        d['final_url'] = final_url
        d['source'] = doc['source']
        d['domain'] = urlparse(final_url).netloc

        page = Page.get_url(d['url'])

        if page is None:
            print "Creating {}".format(d['url'])
            page = Page(d)
            page.save()
        else:
            print "Exists"

        page_ids.append(page._id)
コード例 #9
0
ファイル: articles_pages.py プロジェクト: tfgg/ppsay
"""
    Check that every article's pages exist.
"""

from ppsay.db import db_articles, db_pages

for article_doc in db_articles.find():
    if 'pages' not in article_doc:
        print article_doc['_id'], "MISSING PAGES"
        continue

    for page_id in article_doc['pages']:
        page_doc = db_pages.find_one({'_id': page_id})

        if page_doc is None:
            print page_id, "PAGE MISSING"


コード例 #10
0
ファイル: migrate_pages.py プロジェクト: tfgg/ppsay
from ppsay.page import Page
from ppsay.db import db_articles
from urlparse import urlparse

for doc in db_articles.find():
    if 'page' not in doc or doc['page'] is None:
        print "Skipping"
        continue

    page_ids = []

    for url, final_url in zip(doc['page']['urls'], doc['page']['final_urls']):
        print url, final_url

        d = doc['page']

        d['url'] = url
        d['final_url'] = final_url
        d['source'] = doc['source']
        d['domain'] = urlparse(final_url).netloc

        page = Page.get_url(d['url'])

        if page is None:
            print "Creating {}".format(d['url'])
            page = Page(d)
            page.save()
        else:
            print "Exists"

        page_ids.append(page._id)
コード例 #11
0
ファイル: find_domains.py プロジェクト: tfgg/ppsay
from ppsay.domains import get_domain, new_domain
from ppsay.db import db_articles

for article in db_articles.find({'state': 'approved'}):
    article_domain = article.get('domain')

    if article_domain is not None:
        domain = get_domain(article_domain)

        if domain is None:
            new_domain = new_domain(article_domain)
            print new_domain

コード例 #12
0
ファイル: build_stream.py プロジェクト: tfgg/ppsay
from ppsay.db import db_articles
from ppsay.article import Article

docs = db_articles.find()

for doc in docs:
    if 'pages' not in doc:
        print "MISSING PAGES"
        continue

    article = Article(doc)
    print article.id
    article.update_stream()