def create_vk_spider(name, module, boards=None, owner_id=None, url=None, access_token=''): if not owner_id and not url: raise exc.SpiderException("Either owner_id or url must be specified!") if owner_id and url: raise exc.SpiderException("Both owner_id and url given, choose one") if url: raise exc.SpiderException("Url passing not supported yet") # XXX call to utils.get_access_token left only for convenient # scrapy crawl spider-name calls. # FIXME change to calls from control.py one day access_token = access_token or utils.get_access_token() generated = gen_vk_spider_class( name=name, owner_id=owner_id, boards=boards, access_token=access_token) # a nasty hack to make generated class discoverable by scrapy generated.__module__ = module return generated
def crawl_all(token=None): if not token: LOG.warn("No token passed, " "acquiring one using login data from settings") token = utils.get_access_token() LOG.info("Access token: %s" % token) runner = crawler.CrawlerRunner(project.get_project_settings()) dispatcher.connect(on_close, signal=signals.spider_closed) for spider_cls in spider_utils.find_spiders(): # FIXME incapsulation vialation # inject access_token to a VK spider spider_cls.access_token = token RUNNING_CRAWLERS.append(spider_cls) runner.crawl(spider_cls) d = runner.join() d.addBoth(lambda _: send_mail()) internet.reactor.run()