Example #1
0
 def setUp(self):
     initialize(':memory:')
Example #2
0
 def setUp(self):
     initialize(':memory:')
Example #3
0
from flask import Flask
from app import app
from app import models

#Create app

if __name__ == '__main__':
    models.initialize()
    app.run(debug=True)
Example #4
0
from flask import Flask
from app import app
from app import models

#Create app

if __name__ == '__main__':
	models.initialize()
	app.run(debug=True)
Example #5
0
from app.models import initialize, Page, Link


def sizeof_fmt(num, suffix='B'):
    """ print formatted file size
    http://stackoverflow.com/a/1094933
    """
    for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)


if __name__ == "__main__":
    initialize('corpus.db')
    page_count = Page.select().count()
    crawled_count = Page.select().where(
        (Page.status_code == 200) &
        ((Page.content_type == 'text/html') |
        (Page.content_type == 'text/plain')))\
        .count()
    redirect_count = Page.select().where(Page.status_code == 301).count()
    to_crawl_count = Page.select().where(Page.status_code == 0).count()
    other_count = page_count - crawled_count - redirect_count - to_crawl_count

    link_count = Link.select().count()

    corpus_size = os.stat('corpus.db').st_size
    corpus_size = sizeof_fmt(corpus_size)
Example #6
0
from app.models import initialize
from app.crawl import go


if __name__ == "__main__":
    initialize('corpus.db')
    go()