def _load_selenium_db() -> typing.List[Link]: """Load link from the :mod:`selenium` database. The function reads the :class:`~darc.model.tasks.selenium.SeleniumQueueModel` table. Returns: List of loaded links from the :mod:`selenium` database. Note: At runtime, the function will load links with maximum number at :data:`~darc.db.MAX_POOL` to limit the memory usage. """ now = datetime.datetime.now() if TIME_CACHE is None: sec_delta = 0 max_score = now else: sec_delta = TIME_CACHE max_score = now - sec_delta with database.atomic(): query: typing.List[SeleniumQueueModel] = (SeleniumQueueModel.select( SeleniumQueueModel.link).where( SeleniumQueueModel.timestamp <= max_score).order_by( SeleniumQueueModel.timestamp).limit(MAX_POOL)) link_pool = [model.link for model in query] if TIME_CACHE is not None: new_score = now + sec_delta _save_selenium_db(link_pool, score=new_score) # force update records return link_pool
def _load_requests_db() -> 'List[Link]': """Load link from the :mod:`requests` database. The function reads the :class:`~darc.model.tasks.requests.RequestsQueueModel` table. Returns: List of loaded links from the :mod:`requests` database. Note: At runtime, the function will load links with maximum number at :data:`~darc.db.MAX_POOL` to limit the memory usage. """ now = datetime.now() if TIME_CACHE is None: sec_delta = timedelta(seconds=0) max_score = now else: sec_delta = TIME_CACHE max_score = now - sec_delta with database.atomic(): query = _db_operation( RequestsQueueModel.select(RequestsQueueModel.link).where( RequestsQueueModel.timestamp <= max_score).order_by( RequestsQueueModel.timestamp).limit( MAX_POOL).execute) # type: List[RequestsQueueModel] link_pool = [model.link for model in query] # force update records if TIME_CACHE is not None: new_score = (now + sec_delta).timestamp() _save_requests_db(link_pool, score=new_score) return link_pool
def _save_selenium_db(entries: typing.List[Link], single: bool = False, score=None, nx=False, xx=False): """Save link to the :mod:`selenium` database. The function updates the :class:`~darc.model.tasks.selenium.SeleniumQueueModel` table. Args: entries: Links to be added to the :mod:`selenium` database. It can be either a :obj:`list` of links, or a single link string (if ``single`` set as :data:`True`). single: Indicate if ``entries`` is a :obj:`list` of links or a single link string. score: Score to for the Redis sorted set. nx: Only create new elements and not to update scores for elements that already exist. xx: Only update scores of elements that already exist. New elements will not be added. """ if not entries: return if score is None: score = datetime.datetime.now() if not single: if nx: with database.atomic(): insert_many = [ dict( text=link.url, hash=link.name, link=link, timestamp=score, ) for link in entries ] for batch in peewee.chunked(insert_many, BULK_SIZE): (SeleniumQueueModel.insert_many( insert_many).on_conflict_ignore().execute()) return if xx: entries_text = [link.url for link in entries] (SeleniumQueueModel.update(timestamp=score).where( SeleniumQueueModel.text.in_(entries_text)).execute()) return with database.atomic(): replace_many = [ dict(text=link.url, hash=link.name, link=link, timestamp=score) for link in entries ] for batch in peewee.chunked(replace_many, BULK_SIZE): SeleniumQueueModel.replace_many(batch).execute() return if nx: SeleniumQueueModel.get_or_create(text=entries.url, defaults=dict( hash=entries.name, link=entries, timestamp=score, )) return if xx: with contextlib.suppress(peewee.DoesNotExist): model = SeleniumQueueModel.get( SeleniumQueueModel.text == entries.url) model.timestamp = score model.save() return SeleniumQueueModel.replace(text=entries.url, hash=entries.name, link=entries, timestamp=score).execute()
def _save_requests_db(entries: typing.Union[Link, typing.List[Link]], single: bool = False, score: typing.Optional[float] = None, nx: bool = False, xx: bool = False) -> None: """Save link to the :mod:`requests` database. The function updates the :class:`~darc.model.tasks.requests.RequestsQueueModel` table. Args: entries: Links to be added to the :mod:`requests` database. It can be either a :obj:`list` of links, or a single link string (if ``single`` set as :data:`True`). single: Indicate if ``entries`` is a :obj:`list` of links or a single link string. score: Score to for the Redis sorted set. nx: Only create new elements and not to update scores for elements that already exist. xx: Only update scores of elements that already exist. New elements will not be added. """ if not entries: return if score is None: score = datetime.datetime.now() # type: ignore if not single: if typing.TYPE_CHECKING: entries = typing.cast(typing.List[Link], entries) if nx: with database.atomic(): insert_many = [dict( text=link.url, hash=link.name, link=link, timestamp=score, ) for link in entries] for batch in peewee.chunked(insert_many, BULK_SIZE): _db_operation(RequestsQueueModel .insert_many(insert_many) .on_conflict_ignore() .execute) return if xx: entries_text = [link.url for link in entries] _db_operation(RequestsQueueModel .update(timestamp=score) .where(typing.cast(peewee.TextField, RequestsQueueModel.text).in_(entries_text)) .execute) return with database.atomic(): replace_many = [dict( text=link.url, hash=link.name, link=link, timestamp=score ) for link in entries] for batch in peewee.chunked(replace_many, BULK_SIZE): _db_operation(RequestsQueueModel.replace_many(batch).execute) return if typing.TYPE_CHECKING: entries = typing.cast(Link, entries) if nx: _db_operation(RequestsQueueModel.get_or_create, text=entries.url, defaults=dict( hash=entries.name, link=entries, timestamp=score, )) return if xx: with contextlib.suppress(peewee.DoesNotExist): model = _db_operation(RequestsQueueModel.get, RequestsQueueModel.text == entries.url) model.timestamp = score _db_operation(model.save) return _db_operation(RequestsQueueModel.replace( text=entries.url, hash=entries.name, link=entries, timestamp=score ).execute)
def _save_selenium_db(entries: 'Union[Link, List[Link]]', single: bool = False, score: 'Optional[float]' = None, nx: bool = False, xx: bool = False) -> None: """Save link to the :mod:`selenium` database. The function updates the :class:`~darc.model.tasks.selenium.SeleniumQueueModel` table. Args: entries: Links to be added to the :mod:`selenium` database. It can be either a :obj:`list` of links, or a single link string (if ``single`` set as :data:`True`). single: Indicate if ``entries`` is a :obj:`list` of links or a single link string. score: Score to for the Redis sorted set. nx: Only create new elements and not to update scores for elements that already exist. xx: Only update scores of elements that already exist. New elements will not be added. """ if not entries: return None if score is None: timestamp = datetime.now() else: timestamp = datetime.fromtimestamp(score) if not single: if TYPE_CHECKING: entries = cast('List[Link]', entries) if nx: with database.atomic(): insert_many = [{ 'text': link.url, 'hash': link.name, 'link': link, 'timestamp': timestamp, } for link in entries] for batch in peewee.chunked(insert_many, BULK_SIZE): _db_operation( SeleniumQueueModel.insert_many( insert_many).on_conflict_ignore().execute) return None if xx: entries_text = [link.url for link in entries] _db_operation( SeleniumQueueModel.update(timestamp=timestamp).where( cast('TextField', SeleniumQueueModel.text).in_(entries_text)).execute) return None with database.atomic(): replace_many = [{ 'text': link.url, 'hash': link.name, 'link': link, 'timestamp': timestamp, } for link in entries] for batch in peewee.chunked(replace_many, BULK_SIZE): _db_operation(SeleniumQueueModel.replace_many(batch).execute) return None if TYPE_CHECKING: entries = cast('Link', entries) if nx: _db_operation(SeleniumQueueModel.get_or_create, text=entries.url, defaults={ 'hash': entries.name, 'link': entries, 'timestamp': timestamp, }) return None if xx: with contextlib.suppress(peewee.DoesNotExist): model = _db_operation(SeleniumQueueModel.get, SeleniumQueueModel.text == entries.url) # type: SeleniumQueueModel # pylint: disable=line-too-long model.timestamp = timestamp _db_operation(model.save) return None _db_operation( SeleniumQueueModel.replace( text=entries.url, hash=entries.name, link=entries, timestamp=timestamp, ).execute) return None