def run_command(name): from dojang.util import parse_config_file, import_object from easycrawl.models import CrawlSite from easycrawl.helper import create_crawl_class, update_crawl_status from easycrawl.database import crawldb # from keepcd.admin.easycrawl.douban.app import worker site = CrawlSite.query.filter_by(name=name).first() worker_name = "easycrawl.%s.app.worker" % name print worker_name worker = import_object(worker_name) clz = create_crawl_class('voa_news') urls = [ 'http://learningenglish.voanews.com/archive/as-it-is/latest/3521/3521.html', 'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-american-mosaic/latest/986/986.html', 'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-in-the-news/latest/978/1577.html', 'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-making-of-a-nation/latest/978/979.html', 'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-science-in-the-news/latest/978/1579.html', 'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-this-is-america/latest/978/1580.html', 'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-words-stories/latest/978/987.html' ] for url in urls: update_crawl_status(clz, crawldb, url, 0) worker.run(callback=None) worker.run(callback=None) worker.finished()
def run_command(name): from dojang.util import parse_config_file, import_object from easycrawl.models import CrawlSite from easycrawl.helper import create_crawl_class, update_crawl_status from easycrawl.database import crawldb # from keepcd.admin.easycrawl.douban.app import worker site = CrawlSite.query.filter_by(name=name).first() worker_name = "easycrawl.%s.app.worker" % name print worker_name worker = import_object(worker_name) clz = create_crawl_class('imax_im') url = 'http://imax.im/movies?page=1' update_crawl_status(clz, crawldb, url, 0) worker.run(callback=None) worker.run(callback=None) worker.finished()
def run_command(name): from dojang.util import parse_config_file, import_object from easycrawl.models import CrawlSite from easycrawl.helper import create_crawl_class, update_crawl_status from easycrawl.database import crawldb # from keepcd.admin.easycrawl.douban.app import worker site = CrawlSite.query.filter_by(name=name).first() worker_name = "easycrawl.%s.app.worker" % name print worker_name worker = import_object(worker_name) clz = create_crawl_class('esl_pod') url = 'http://www.eslpod.com/website/show_all.php?cat_id=-59456' update_crawl_status(clz, crawldb, url, 0) worker.run(callback=None) worker.run(callback=None) worker.finished()