Esempio n. 1
0
from darc._compat import RobotFileParser
from darc.const import CHECK, PATH_MISC, get_lock
from darc.db import save_requests
from darc.link import parse_link
from darc.logging import logger
from darc.parse import _check, get_content_type, urljoin
from darc.requests import request_session
from darc.save import save_link

if TYPE_CHECKING:
    from typing import List, Optional

    import darc.link as darc_link  # Link

PATH = os.path.join(PATH_MISC, 'invalid.txt')
LOCK = get_lock()


def save_invalid(link: 'darc_link.Link') -> None:
    """Save link with invalid scheme.

    The function will save link with invalid scheme to the file
    as defined in :data:`~darc.proxy.null.PATH`.

    Args:
        link: Link object representing the link with invalid scheme.

    """
    with LOCK:  # type: ignore[union-attr]
        with open(PATH, 'a') as file:
            print(json.dumps({
Esempio n. 2
0
import os
from typing import TYPE_CHECKING

from darc._compat import datetime
from darc.const import PATH_DB, PATH_LN, get_lock
from darc.link import quote

if TYPE_CHECKING:
    from typing import Optional

    from requests import Response, Session

    import darc.link as darc_link  # Link

# lock for file I/O
_SAVE_LOCK = get_lock()


def sanitise(link: 'darc_link.Link',
             time: 'Optional[datetime]' = None,
             raw: bool = False,
             data: bool = False,
             headers: bool = False,
             screenshot: bool = False) -> str:
    """Sanitise link to path.

    Args:
        link: Link object to sanitise the path
        time (datetime): Timestamp for the path.
        raw: If this is a raw HTML document from :mod:`requests`.
        data: If this is a generic content type document.