def test_manager(self): file_path = os.path.realpath(__file__) crawler_dir = os.path.normpath( os.path.join(file_path, "../testdata/config")) manager = CrawlerManager() assert len(manager) == 0 manager.load_path(crawler_dir) assert isinstance(manager.crawlers, dict) assert all(isinstance(crawler, Crawler) for crawler in manager) assert len(manager) == 3 assert isinstance(manager.get("book_scraper"), Crawler) assert isinstance(manager["book_scraper"], Crawler) assert isinstance(manager, Iterable)
def load_manager(): if not hasattr(settings, '_manager'): from memorious.logic.manager import CrawlerManager settings._manager = CrawlerManager() if settings.CONFIG_PATH: settings._manager.load_path(settings.CONFIG_PATH) return settings._manager
def run_file( crawler_config, src=False, threads=None, continue_on_error=False, flush=False, flushall=False, ): """Run a crawler from a YAML config and optionally a source directory""" settings._manager = CrawlerManager() crawler_config = Path(crawler_config) crawler = settings._manager.load_crawler(crawler_config) if not crawler: log.warning("Could not load the crawler. Exiting.") return if src: src_path = crawler_config.parent / "src" sys.path.insert(0, str(src_path)) run_crawler(crawler, threads, continue_on_error, flush, flushall)
def manager(): manager = CrawlerManager() manager.load_path(crawler_dir()) return manager
def get_manager(): manager = CrawlerManager() manager.load_path(get_crawler_dir()) settings._manager = manager return manager