コード例 #1
0
ファイル: base.py プロジェクト: fernflower/scrapeme
def create_vk_spider(name, module, boards=None, owner_id=None, url=None,
                     access_token=''):
    if not owner_id and not url:
        raise exc.SpiderException("Either owner_id or url must be specified!")
    if owner_id and url:
        raise exc.SpiderException("Both owner_id and url given, choose one")
    if url:
        raise exc.SpiderException("Url passing not supported yet")
    # XXX call to utils.get_access_token left only for convenient
    # scrapy crawl spider-name calls.
    # FIXME change to calls from control.py one day
    access_token = access_token or utils.get_access_token()
    generated = gen_vk_spider_class(
        name=name, owner_id=owner_id, boards=boards, access_token=access_token)
    # a nasty hack to make generated class discoverable by scrapy
    generated.__module__ = module
    return generated
コード例 #2
0
ファイル: control.py プロジェクト: fernflower/scrapeme
def crawl_all(token=None):
    if not token:
        LOG.warn("No token passed, "
                 "acquiring one using login data from settings")
        token = utils.get_access_token()
    LOG.info("Access token: %s" % token)
    runner = crawler.CrawlerRunner(project.get_project_settings())

    dispatcher.connect(on_close, signal=signals.spider_closed)
    for spider_cls in spider_utils.find_spiders():
        # FIXME incapsulation vialation
        # inject access_token to a VK spider
        spider_cls.access_token = token
        RUNNING_CRAWLERS.append(spider_cls)
        runner.crawl(spider_cls)
    d = runner.join()
    d.addBoth(lambda _: send_mail())

    internet.reactor.run()