Esempio n. 1
0
 def test_manager(self):
     file_path = os.path.realpath(__file__)
     crawler_dir = os.path.normpath(
         os.path.join(file_path, "../testdata/config"))
     manager = CrawlerManager()
     assert len(manager) == 0
     manager.load_path(crawler_dir)
     assert isinstance(manager.crawlers, dict)
     assert all(isinstance(crawler, Crawler) for crawler in manager)
     assert len(manager) == 3
     assert isinstance(manager.get("book_scraper"), Crawler)
     assert isinstance(manager["book_scraper"], Crawler)
     assert isinstance(manager, Iterable)
Esempio n. 2
0
def load_manager():
    if not hasattr(settings, '_manager'):
        from memorious.logic.manager import CrawlerManager
        settings._manager = CrawlerManager()
        if settings.CONFIG_PATH:
            settings._manager.load_path(settings.CONFIG_PATH)
    return settings._manager
Esempio n. 3
0
def run_file(
    crawler_config,
    src=False,
    threads=None,
    continue_on_error=False,
    flush=False,
    flushall=False,
):
    """Run a crawler from a YAML config and optionally a source directory"""
    settings._manager = CrawlerManager()
    crawler_config = Path(crawler_config)
    crawler = settings._manager.load_crawler(crawler_config)
    if not crawler:
        log.warning("Could not load the crawler. Exiting.")
        return
    if src:
        src_path = crawler_config.parent / "src"
        sys.path.insert(0, str(src_path))
    run_crawler(crawler, threads, continue_on_error, flush, flushall)
Esempio n. 4
0
def manager():
    manager = CrawlerManager()
    manager.load_path(crawler_dir())
    return manager
Esempio n. 5
0
def get_manager():
    manager = CrawlerManager()
    manager.load_path(get_crawler_dir())
    settings._manager = manager
    return manager