Exemple #1
0
def run_command(name):
    
    from dojang.util import parse_config_file, import_object
    from easycrawl.models import CrawlSite
    from easycrawl.helper import create_crawl_class, update_crawl_status
    from easycrawl.database import crawldb
    # from keepcd.admin.easycrawl.douban.app import worker
    

    site = CrawlSite.query.filter_by(name=name).first()
    worker_name = "easycrawl.%s.app.worker" % name
    print worker_name
    worker = import_object(worker_name)
    clz = create_crawl_class('voa_news')
    urls = [
        'http://learningenglish.voanews.com/archive/as-it-is/latest/3521/3521.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-american-mosaic/latest/986/986.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-in-the-news/latest/978/1577.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-making-of-a-nation/latest/978/979.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-science-in-the-news/latest/978/1579.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-this-is-america/latest/978/1580.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-words-stories/latest/978/987.html'
    ]

    for url in urls:
        update_crawl_status(clz, crawldb, url, 0)

    worker.run(callback=None)
    worker.run(callback=None)
    worker.finished()
Exemple #2
0
def run_command(name):
    
    from dojang.util import parse_config_file, import_object
    from easycrawl.models import CrawlSite
    from easycrawl.helper import create_crawl_class, update_crawl_status
    from easycrawl.database import crawldb
    # from keepcd.admin.easycrawl.douban.app import worker
    

    site = CrawlSite.query.filter_by(name=name).first()
    worker_name = "easycrawl.%s.app.worker" % name
    print worker_name
    worker = import_object(worker_name)
    clz = create_crawl_class('imax_im')
    url = 'http://imax.im/movies?page=1'
    update_crawl_status(clz, crawldb, url, 0)

    worker.run(callback=None)
    worker.run(callback=None)
    worker.finished()
Exemple #3
0
def run_command(name):
    
    from dojang.util import parse_config_file, import_object
    from easycrawl.models import CrawlSite
    from easycrawl.helper import create_crawl_class, update_crawl_status
    from easycrawl.database import crawldb
    # from keepcd.admin.easycrawl.douban.app import worker
    

    site = CrawlSite.query.filter_by(name=name).first()
    worker_name = "easycrawl.%s.app.worker" % name
    print worker_name
    worker = import_object(worker_name)
    clz = create_crawl_class('esl_pod')
    url = 'http://www.eslpod.com/website/show_all.php?cat_id=-59456'
    update_crawl_status(clz, crawldb, url, 0)

    worker.run(callback=None)
    worker.run(callback=None)
    worker.finished()