def get_exposed_crawlers(): """Return all crawlers which can be run automatically via the web UI.""" for name, clazz in get_crawlers().items(): if not issubclass(clazz, DocumentCrawler): continue if clazz.COLLECTION_ID is None: continue yield clazz()
def crawl(name): """Execute the given crawler.""" log.info('Crawling %r...', name) crawlers = get_crawlers() if name not in crawlers: log.info('No such crawler: %r', name) else: crawler = crawlers.get(name)() crawler.execute() db.session.commit()
def crawl(name, incremental=False, param=None): """Execute the given crawler. Use param to pass in additional args """ log.info('Crawling %r...', name) crawlers = get_crawlers() if name not in crawlers: log.info('No such crawler: %r', name) else: crawler = crawlers.get(name)() params = {'param': param} if param else {} crawler.execute(incremental=incremental, **params) #execute_crawler.delay(crawler.get_id(), incremental=incremental, param=param) db.session.commit() # XXX meaningless
def name(self): for name, cls in get_crawlers().items(): if isinstance(self, cls): return name