Esempio n. 1
0
def get_exposed_crawlers():
    """Return all crawlers which can be run automatically via the web UI."""
    for name, clazz in get_crawlers().items():
        if not issubclass(clazz, DocumentCrawler):
            continue
        if clazz.COLLECTION_ID is None:
            continue
        yield clazz()
Esempio n. 2
0
def get_exposed_crawlers():
    """Return all crawlers which can be run automatically via the web UI."""
    for name, clazz in get_crawlers().items():
        if not issubclass(clazz, DocumentCrawler):
            continue
        if clazz.COLLECTION_ID is None:
            continue
        yield clazz()
Esempio n. 3
0
def crawl(name):
    """Execute the given crawler."""
    log.info('Crawling %r...', name)
    crawlers = get_crawlers()
    if name not in crawlers:
        log.info('No such crawler: %r', name)
    else:
        crawler = crawlers.get(name)()
        crawler.execute()
    db.session.commit()
Esempio n. 4
0
def crawl(name):
    """Execute the given crawler."""
    log.info('Crawling %r...', name)
    crawlers = get_crawlers()
    if name not in crawlers:
        log.info('No such crawler: %r', name)
    else:
        crawler = crawlers.get(name)()
        crawler.execute()
    db.session.commit()
Esempio n. 5
0
def crawl(name, incremental=False, param=None):
    """Execute the given crawler.
    Use param to pass in additional args
    """
    log.info('Crawling %r...', name)
    crawlers = get_crawlers()
    if name not in crawlers:
        log.info('No such crawler: %r', name)
    else:
        crawler = crawlers.get(name)()
        params = {'param': param} if param else {}
        crawler.execute(incremental=incremental, **params)
        #execute_crawler.delay(crawler.get_id(), incremental=incremental, param=param)
    db.session.commit() # XXX meaningless
Esempio n. 6
0
File: crawler.py Progetto: 01-/aleph
 def name(self):
     for name, cls in get_crawlers().items():
         if isinstance(self, cls):
             return name
Esempio n. 7
0
 def name(self):
     for name, cls in get_crawlers().items():
         if isinstance(self, cls):
             return name