Beispiel #1
0
 def info_imagecrawler_desc(imagecrawler: str) -> int:
     from nichtparasoup._internals import _log
     from nichtparasoup.core.server import type_module_name_str
     from nichtparasoup.imagecrawler import get_imagecrawlers
     imagecrawler_class = get_imagecrawlers().get_class(imagecrawler)
     if not imagecrawler_class:
         _message_exception(ValueError('unknown ImageCrawler {!r}'.format(imagecrawler)))
         return 1
     info = []
     info_linebreak = '\r\n'
     imagecrawler_info = imagecrawler_class.info()
     info.append(imagecrawler_info.description)
     if imagecrawler_info.long_description:
         info.append(imagecrawler_info.long_description)
     if imagecrawler_info.config:
         info_bull = info_linebreak + ' * '
         mlen = max(len(k) for k in imagecrawler_info.config.keys())
         info.append('Config: ' + info_bull + info_bull.join([
             '{key:{mlen}}: {desc}'.format(mlen=mlen, key=key, desc=desc)
             for key, desc in imagecrawler_info.config.items()]))
     _message((info_linebreak * 2).join(info))
     _log('debug', info_linebreak.join(
         [
             info_linebreak,
             'DEBUG INFO',
             'Icon : {!r}'.format(imagecrawler_info.icon_url),
             'Class: {!r}'.format(type_module_name_str(imagecrawler_class)),
         ]))
     return 0
Beispiel #2
0
 def _log_refill_crawler(crawler: Crawler, refilled: int) -> None:
     # must be compatible to nichtparasoup.core._OnFill
     if refilled > 0:
         _log(
             'info',
             "refilled by {} via {!r}".format(refilled,
                                              crawler.imagecrawler))
Beispiel #3
0
 def __init__(self, entries: Iterable[EntryPoint]) -> None:  # pragma: no cover
     self._list = [(n, c) for n, c in self._builtins().items()]  # type: List[_Imagecrawler]
     for entry in entries:
         try:
             self._add(entry)
             _log('debug', 'Entry point added: {} from {!r}'.format(entry, entry.dist))
         except Exception as e:
             _log('debug', 'Entry point skipped: {} from {!r}\r\n\t{}'.format(entry, entry.dist, e), exc_info=True)
Beispiel #4
0
 def stop(self) -> None:
     self._run_lock.acquire()
     try:
         if not self.is_alive():
             raise RuntimeError('not running')
         _log('info', " * stopping {}".format(type(self).__name__))
         self._stop_event.set()
     finally:
         self._run_lock.release()
Beispiel #5
0
 def stop(self) -> None:
     with self._locks.run:
         if not self.__running:
             raise RuntimeError('not running')
         _log('info', "\r\n * stopping {}".format(type(self).__name__))
         if self._refiller:
             self._refiller.stop()
             self._refiller = None
         self.__running = False
Beispiel #6
0
 def start(self) -> None:
     self._run_lock.acquire()
     try:
         if self.is_alive():
             raise RuntimeError('already running')
         _log('info', " * starting {}".format(type(self).__name__))
         self._stop_event.clear()
         super().start()
     finally:
         self._run_lock.release()
Beispiel #7
0
 def start(self) -> None:
     with self._locks.run:
         if self.__running:
             raise RuntimeError('already running')
         _log('info', " * starting {}".format(type(self).__name__))
         _log('info', ' * fill all crawlers up to {}'.format(self.keep))
         self.refill()  # initial fill
         if not self._refiller:
             self._refiller = ServerRefiller(self, 1)
             self._refiller.start()  # start threaded periodical refill
         self._stats.time_started = int(time())
         self.__running = True
Beispiel #8
0
 def run(self) -> None:
     from time import sleep
     while not self._stop_event.is_set():
         server = self._server_wr()  # type: Optional[Server]
         if server:
             server.refill()
         else:
             _log('info',
                  " * server gone. stopping {}".format(type(self).__name__))
             self._stop_event.set()
         if not self._stop_event.is_set():
             sleep(self._sleep)
Beispiel #9
0
 def get_stream(self,
                uri: str) -> Tuple[Union[HTTPResponse, addinfourl], str]:
     if not self._valid_uri(uri):
         raise ValueError('not remote: ' + uri)
     _log(
         'debug', 'fetch remote {!r} in {}s with {!r}'.format(
             uri, self._timeout, self._headers))
     request = Request(uri, headers=self._headers)
     try:
         response = urlopen(
             request,
             timeout=self._timeout)  # type: Union[HTTPResponse, addinfourl]
     except Exception as e:
         _log('debug',
              'caught error on fetch remote {!r}'.format(uri),
              exc_info=True)
         raise e
     actual_uri = response.geturl()  # after following redirects ...
     return response, actual_uri
Beispiel #10
0
 def crawl(self) -> ImageCollection:  # pragma: no cover
     with self._crawl_lock:
         try:
             if self._reset_before_next_crawl:
                 _log('debug', 'crawler resetting {!r}'.format(self))
                 self._reset()
                 self._reset_before_next_crawl = False
             _log('debug', 'crawling started {!r}'.format(self))
             crawled = self._crawl()
             _log('debug', 'crawling finished {!r}'.format(self))
             return crawled
         except Exception:
             _log('exception',
                  'caught an error during crawling {!r}'.format(self))
             return ImageCollection()
Beispiel #11
0
 def run(self) -> None:
     from werkzeug.serving import run_simple
     from nichtparasoup._internals import _log
     self.imageserver.start()
     try:
         _log('info', ' * starting {0} bound to {1.hostname} on port {1.port}'.format(type(self).__name__, self))
         run_simple(
             self.hostname, self.port,
             application=self,
             static_files={'/': self._STATIC_FILES},
             processes=1, threaded=True,
             use_reloader=False,
             use_debugger=False)
         _log('info', ' * stopped {0} bound to {1.hostname} on port {1.port}'.format(type(self).__name__, self))
     except Exception as e:
         _log('exception', ' * Error occurred. stopping everything')
         raise e
     finally:
         self.imageserver.stop()
Beispiel #12
0
 def reset(self) -> None:
     self._reset_before_next_crawl = True
     _log('debug', 'crawler reset planned for {!r}'.format(self))
Beispiel #13
0
 def __init__(self, **config: Any) -> None:  # pragma: no cover
     self._config = self.check_config(
         config)  # intended to be immutable from now on
     self._reset_before_next_crawl = True
     self._crawl_lock = Lock()
     _log('debug', 'crawler initialized: {!r}'.format(self))