def _load_requests_redis() -> typing.List[Link]: """Load link from the :mod:`requests` database. The function reads the ``queue_requests`` database. Returns: List of loaded links from the :mod:`requests` database. Note: At runtime, the function will load links with maximum number at :data:`~darc.db.MAX_POOL` to limit the memory usage. """ now = time.time() if TIME_CACHE is None: sec_delta = 0 # type: ignore max_score = now else: sec_delta = TIME_CACHE.total_seconds() max_score = now - sec_delta try: with _redis_get_lock('lock_queue_requests', blocking_timeout=LOCK_TIMEOUT): # type: ignore temp_pool: typing.List[bytes] = [_redis_command('get', name) for name in _redis_command('zrangebyscore', 'queue_requests', # pylint: disable=line-too-long min=0, max=max_score, start=0, num=MAX_POOL)] # pylint: disable=line-too-long link_pool = [pickle.loads(link) for link in filter(None, temp_pool)] # nosec if TIME_CACHE is not None: new_score = now + sec_delta _save_requests_redis(link_pool, score=new_score) # force update records except redis_lock.LockError: warning = warnings.formatwarning(f'[REQUESTS] Failed to acquire Redis lock after {LOCK_TIMEOUT} second(s)', LockWarning, __file__, 949, "_redis_get_lock('lock_queue_requests')") print(render_error(warning, stem.util.term.Color.YELLOW), end='', file=sys.stderr) # pylint: disable=no-member link_pool = list() return link_pool
def _load_selenium_redis() -> typing.List[Link]: """Load link from the :mod:`selenium` database. The function reads the ``queue_selenium`` database. Args: check: If perform checks on loaded links, default to :data:`~darc.const.CHECK`. Returns: List of loaded links from the :mod:`selenium` database. Note: At runtime, the function will load links with maximum number at :data:`~darc.db.MAX_POOL` to limit the memory usage. """ now = time.time() if TIME_CACHE is None: sec_delta = 0 max_score = now else: sec_delta = TIME_CACHE.total_seconds() max_score = now - sec_delta try: with _redis_get_lock('lock_queue_selenium', blocking_timeout=LOCK_TIMEOUT): link_pool = [ pickle.loads(link) for link in _redis_command('zrangebyscore', 'queue_selenium', min=0, max=max_score, start=0, num=MAX_POOL) ] if TIME_CACHE is not None: new_score = now + sec_delta _save_selenium_redis(link_pool, score=new_score) # force update records except redis_lock.LockError: warning = warnings.formatwarning( f'[SELENIUM] Failed to acquire Redis lock after {LOCK_TIMEOUT} second(s)', LockWarning, __file__, 299, "_redis_get_lock('lock_queue_selenium')") print(render_error(warning, stem.util.term.Color.YELLOW), end='', file=sys.stderr) # pylint: disable=no-member link_pool = list() return link_pool
def _load_selenium_redis() -> 'List[Link]': """Load link from the :mod:`selenium` database. The function reads the ``queue_selenium`` database. Args: check: If perform checks on loaded links, default to :data:`~darc.const.CHECK`. Returns: List of loaded links from the :mod:`selenium` database. Note: At runtime, the function will load links with maximum number at :data:`~darc.db.MAX_POOL` to limit the memory usage. """ now = time.time() if TIME_CACHE is None: sec_delta = 0 # type: float max_score = now else: sec_delta = TIME_CACHE.total_seconds() max_score = now - sec_delta try: with _redis_get_lock('queue_selenium'): temp_pool = [_redis_command('get', name) for name in _redis_command('zrangebyscore', 'queue_selenium', min=0, max=max_score, start=0, num=MAX_POOL)] # type: List[bytes] # pylint: disable=line-too-long link_pool = [ pickle.loads(link) for link in filter(None, temp_pool) ] # nosec: B301 if TIME_CACHE is not None: new_score = now + sec_delta _save_selenium_redis(link_pool, score=new_score) # force update records except pottery_exceptions.PotteryError: logger.pexc( LOG_WARNING, f'[SELENIUM] Failed to acquire Redis lock after {LOCK_TIMEOUT} second(s)', LockWarning, "_redis_get_lock('queue_selenium')") link_pool = [] return link_pool
def _have_hostname_redis(link: 'Link') -> 'Tuple[bool, bool]': """Check if current link is a new host. The function checks the ``queue_hostname`` database. Args: link: Link to check against. Returns: A tuple of two :obj:`bool` values representing if such link is a known host and needs force refetch respectively. """ new_score = time.time() if TIME_CACHE is None: threshold = math.inf else: threshold = new_score - TIME_CACHE.total_seconds() with _redis_get_lock('queue_hostname'): score = _redis_command('zscore', 'queue_hostname', link.host) # type: Optional[int] if score is None: have_flag = False force_fetch = False # update Redis record redis_update = True else: have_flag = True force_fetch = score < threshold # update Redis record (only if re-fetch) redis_update = force_fetch if redis_update: _redis_command('zadd', 'queue_hostname', { link.host: new_score, }) return have_flag, force_fetch