Example #1
0
def have_hostname(link: Link) -> typing.Tuple[bool, bool]:
    """Check if current link is a new host.

    Args:
        link: Link to check against.

    Returns:
        A tuple of two :obj:`bool` values representing
        if such link is a known host and needs force
        refetch respectively.

    See Also:
        * :func:`darc.db._have_hostname_db`
        * :func:`darc.db._have_hostname_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            try:
                return _have_hostname_db(link)
            except Exception as error:
                warning = warnings.formatwarning(str(error), DatabaseOperaionFailed, __file__, 236,
                                                 f'_have_hostname_db({link})')
                print(render_error(warning, stem.util.term.Color.YELLOW), end='', file=sys.stderr)  # pylint: disable=no-member
                return False, False
    return _have_hostname_redis(link)
Example #2
0
def have_hostname(link: 'Link') -> 'Tuple[bool, bool]':
    """Check if current link is a new host.

    Args:
        link: Link to check against.

    Returns:
        A tuple of two :obj:`bool` values representing
        if such link is a known host and needs force
        refetch respectively.

    See Also:
        * :func:`darc.db._have_hostname_db`
        * :func:`darc.db._have_hostname_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            try:
                return _have_hostname_db(link)
            except Exception:
                logger.pexc(LOG_WARNING,
                            category=DatabaseOperaionFailed,
                            line=f'_have_hostname_db({link.url})')
                return False, False
    return _have_hostname_redis(link)
Example #3
0
def drop_selenium(link: Link):
    """Remove link from the :mod:`selenium` database.

    Args:
        link: Link to be removed.

    See Also:
        * :func:`darc.db._drop_selenium_db`
        * :func:`darc.db._drop_selenium_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            return _drop_selenium_db(link)
    return _drop_selenium_redis(link)
Example #4
0
def drop_hostname(link: Link):
    """Remove link from the hostname database.

    Args:
        link: Link to be removed.

    See Also:
        * :func:`darc.db._drop_hostname_db`
        * :func:`darc.db._drop_hostname_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            return _drop_hostname_db(link)
    return _drop_hostname_redis(link)
Example #5
0
def save_requests(entries: 'Union[Link, List[Link]]',
                  single: bool = False,
                  score: 'Optional[float]' = None,
                  nx: bool = False,
                  xx: bool = False) -> None:
    """Save link to the :mod:`requests` database.

    The function updates the ``queue_requests`` database.

    Args:
        entries: Links to be added to the :mod:`requests` database.
            It can be either a :obj:`list` of links, or a single
            link string (if ``single`` set as :data:`True`).
        single: Indicate if ``entries`` is a :obj:`list` of links
            or a single link string.
        score: Score to for the Redis sorted set.
        nx: Only create new elements and not to
            update scores for elements that already exist.
        xx: Only update scores of elements that
            already exist. New elements will not be added.

    Notes:
        The ``entries`` will be dumped through :mod:`pickle` so that
        :mod:`darc` do not need to parse them again.

    When ``entries`` is a list of :class:`~darc.link.Link` instances,
    we tries to perform *bulk* update to easy the memory consumption.
    The *bulk* size is defined by :data:`~darc.db.BULK_SIZE`.

    See Also:
        * :func:`darc.db._save_requests_db`
        * :func:`darc.db._save_requests_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            try:
                return _save_requests_db(entries, single, score, nx,
                                         xx)  # type: ignore[call-overload]
            except Exception:
                _arg_msg = _gen_arg_msg(entries, single, score, nx, xx)
                logger.pexc(LOG_WARNING,
                            category=DatabaseOperaionFailed,
                            line=f'_save_requests_db({_arg_msg})')
                return None
    return _save_requests_redis(entries, single, score, nx, xx)
Example #6
0
def have_hostname(link: Link) -> bool:
    """Check if current link is a new host.

    Args:
        link: Link to check against.

    Returns:
        If such link is a new host.

    See Also:
        * :func:`darc.db._have_hostname_db`
        * :func:`darc.db._have_hostname_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            return _have_hostname_db(link)
    return _have_hostname_redis(link)
Example #7
0
def load_selenium(check: bool = CHECK) -> typing.List[Link]:
    """Load link from the :mod:`selenium` database.

    Args:
        check: If perform checks on loaded links,
            default to :data:`~darc.const.CHECK`.

    Returns:
        List of loaded links from the :mod:`selenium` database.

    Note:
        At runtime, the function will load links with maximum number
        at :data:`~darc.db.MAX_POOL` to limit the memory usage.

    See Also:
        * :func:`darc.db._load_selenium_db`
        * :func:`darc.db._load_selenium_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            try:
                link_pool = _load_selenium_db()
            except Exception as error:
                warning = warnings.formatwarning(str(error), DatabaseOperaionFailed, __file__, 983,
                                                 '_load_selenium_db()')
                print(render_error(warning, stem.util.term.Color.YELLOW), end='', file=sys.stderr)  # pylint: disable=no-member
                link_pool = list()
    else:
        link_pool = _load_selenium_redis()

    if check:
        link_pool = _check(link_pool)

    if VERBOSE:
        print(stem.util.term.format('-*- [SELENIUM] LINK POOL -*-',
                                    stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
        print(render_error(pprint.pformat(sorted(link.url for link in link_pool)),
                           stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
        print(stem.util.term.format('-' * shutil.get_terminal_size().columns,
                                    stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
    return link_pool
Example #8
0
def save_requests(entries: typing.Union[Link, typing.List[Link]], single: bool = False,  # pylint: disable=inconsistent-return-statements
                  score: typing.Optional[float] = None, nx: bool = False, xx: bool = False) -> None:
    """Save link to the :mod:`requests` database.

    The function updates the ``queue_requests`` database.

    Args:
        entries: Links to be added to the :mod:`requests` database.
            It can be either a :obj:`list` of links, or a single
            link string (if ``single`` set as :data:`True`).
        single: Indicate if ``entries`` is a :obj:`list` of links
            or a single link string.
        score: Score to for the Redis sorted set.
        nx: Only create new elements and not to
            update scores for elements that already exist.
        xx: Only update scores of elements that
            already exist. New elements will not be added.

    Notes:
        The ``entries`` will be dumped through :mod:`pickle` so that
        :mod:`darc` do not need to parse them again.

    When ``entries`` is a list of :class:`~darc.link.Link` instances,
    we tries to perform *bulk* update to easy the memory consumption.
    The *bulk* size is defined by :data:`~darc.db.BULK_SIZE`.

    See Also:
        * :func:`darc.db._save_requests_db`
        * :func:`darc.db._save_requests_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            try:
                return _save_requests_db(entries, single, score, nx, xx)
            except Exception as error:
                warning = warnings.formatwarning(str(error), DatabaseOperaionFailed, __file__, 505,
                                                 '_save_requests_db(...)')
                print(render_error(warning, stem.util.term.Color.YELLOW), end='', file=sys.stderr)  # pylint: disable=no-member
                return
    return _save_requests_redis(entries, single, score, nx, xx)
Example #9
0
def drop_selenium(link: Link) -> None:  # pylint: disable=inconsistent-return-statements
    """Remove link from the :mod:`selenium` database.

    Args:
        link: Link to be removed.

    See Also:
        * :func:`darc.db._drop_selenium_db`
        * :func:`darc.db._drop_selenium_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            try:
                return _drop_selenium_db(link)
            except Exception as error:
                warning = warnings.formatwarning(str(error), DatabaseOperaionFailed, __file__, 433,
                                                 f'_drop_selenium_db({link})')
                print(render_error(warning, stem.util.term.Color.YELLOW), end='', file=sys.stderr)  # pylint: disable=no-member
                return
    return _drop_selenium_redis(link)
Example #10
0
def drop_selenium(link: 'Link') -> None:  # pylint: disable=inconsistent-return-statements
    """Remove link from the :mod:`selenium` database.

    Args:
        link: Link to be removed.

    See Also:
        * :func:`darc.db._drop_selenium_db`
        * :func:`darc.db._drop_selenium_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            try:
                return _drop_selenium_db(link)
            except Exception:
                logger.pexc(LOG_WARNING,
                            category=DatabaseOperaionFailed,
                            line=f'_drop_selenium_db({link.url})')
                return None
    return _drop_selenium_redis(link)
Example #11
0
def drop_hostname(link: 'Link') -> None:
    """Remove link from the hostname database.

    Args:
        link: Link to be removed.

    See Also:
        * :func:`darc.db._drop_hostname_db`
        * :func:`darc.db._drop_hostname_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            try:
                return _drop_hostname_db(link)
            except Exception:
                logger.pexc(LOG_WARNING,
                            category=DatabaseOperaionFailed,
                            line=f'_drop_hostname_db({link.url})')
                return None
    return _drop_hostname_redis(link)
Example #12
0
def load_requests(check: bool = CHECK) -> typing.List[Link]:
    """Load link from the :mod:`requests` database.

    Args:
        check: If perform checks on loaded links,
            default to :data:`~darc.const.CHECK`.

    Returns:
        List of loaded links from the :mod:`requests` database.

    Note:
        At runtime, the function will load links with maximum number
        at :data:`~darc.db.MAX_POOL` to limit the memory usage.

    See Also:
        * :func:`darc.db._load_requests_db`
        * :func:`darc.db._load_requests_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            link_pool = _load_requests_db()
    else:
        link_pool = _load_requests_redis()

    if check:
        link_pool = _check(link_pool)

    if VERBOSE:
        print(
            stem.util.term.format('-*- [REQUESTS] LINK POOL -*-',
                                  stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
        print(
            render_error(
                pprint.pformat(sorted(link.url for link in link_pool)),
                stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
        print(
            stem.util.term.format('-' * shutil.get_terminal_size().columns,
                                  stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
    return link_pool
Example #13
0
def save_requests(entries: typing.List[Link],
                  single: bool = False,
                  score=None,
                  nx=False,
                  xx=False):
    """Save link to the :mod:`requests` database.

    The function updates the ``queue_requests`` database.

    Args:
        entries: Links to be added to the :mod:`requests` database.
            It can be either a :obj:`list` of links, or a single
            link string (if ``single`` set as :data:`True`).
        single: Indicate if ``entries`` is a :obj:`list` of links
            or a single link string.
        score: Score to for the Redis sorted set.
        nx: Only create new elements and not to
            update scores for elements that already exist.
        xx: Only update scores of elements that
            already exist. New elements will not be added.

    Notes:
        The ``entries`` will be dumped through :mod:`pickle` so that
        :mod:`darc` do not need to parse them again.

    When ``entries`` is a list of :class:`~darc.link.Link` instances,
    we tries to perform *bulk* update to easy the memory consumption.
    The *bulk* size is defined by :data:`~darc.db.BULK_SIZE`.

    See Also:
        * :func:`darc.db._save_requests_db`
        * :func:`darc.db._save_requests_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            return _save_requests_db(entries, single, score, nx, xx)
    return _save_requests_redis(entries, single, score, nx, xx)
Example #14
0
def load_requests(check: bool = CHECK) -> 'List[Link]':
    """Load link from the :mod:`requests` database.

    Args:
        check: If perform checks on loaded links,
            default to :data:`~darc.const.CHECK`.

    Returns:
        List of loaded links from the :mod:`requests` database.

    Note:
        At runtime, the function will load links with maximum number
        at :data:`~darc.db.MAX_POOL` to limit the memory usage.

    See Also:
        * :func:`darc.db._load_requests_db`
        * :func:`darc.db._load_requests_redis`

    """
    if FLAG_DB:
        with database.connection_context():
            try:
                link_pool = _load_requests_db()
            except Exception:
                logger.pexc(LOG_WARNING,
                            category=DatabaseOperaionFailed,
                            line='_load_requests_db()')
                link_pool = []
    else:
        link_pool = _load_requests_redis()

    if check:
        link_pool = _check(link_pool)

    logger.plog(LOG_VERBOSE,
                '-*- [REQUESTS] LINK POOL -*-',
                object=sorted(link.url for link in link_pool))
    return link_pool