from darc._compat import RobotFileParser from darc.const import CHECK, PATH_MISC, get_lock from darc.db import save_requests from darc.link import parse_link from darc.logging import logger from darc.parse import _check, get_content_type, urljoin from darc.requests import request_session from darc.save import save_link if TYPE_CHECKING: from typing import List, Optional import darc.link as darc_link # Link PATH = os.path.join(PATH_MISC, 'invalid.txt') LOCK = get_lock() def save_invalid(link: 'darc_link.Link') -> None: """Save link with invalid scheme. The function will save link with invalid scheme to the file as defined in :data:`~darc.proxy.null.PATH`. Args: link: Link object representing the link with invalid scheme. """ with LOCK: # type: ignore[union-attr] with open(PATH, 'a') as file: print(json.dumps({
import os from typing import TYPE_CHECKING from darc._compat import datetime from darc.const import PATH_DB, PATH_LN, get_lock from darc.link import quote if TYPE_CHECKING: from typing import Optional from requests import Response, Session import darc.link as darc_link # Link # lock for file I/O _SAVE_LOCK = get_lock() def sanitise(link: 'darc_link.Link', time: 'Optional[datetime]' = None, raw: bool = False, data: bool = False, headers: bool = False, screenshot: bool = False) -> str: """Sanitise link to path. Args: link: Link object to sanitise the path time (datetime): Timestamp for the path. raw: If this is a raw HTML document from :mod:`requests`. data: If this is a generic content type document.