def test_url_scheme(): """Absolute URLs with bad scheme raise ValueError, but not RelativeURLError """ with pytest.raises(ValueError): try: parse_absolute_url("ftp://localhost:8000/second_page.html") except RelativeURLError: raise AssertionError('should not raise RelativeURLError')
def _add_extra_pages(self, prefix, extras): """Add URLs of extra pages from config. Handles both literal URLs and generators. """ for extra in extras: if isinstance(extra, dict): try: generator = extra['generator'] except KeyError: raise ValueError( 'extra_pages must be strings or dicts with ' + f'a "generator" key, not `{extra}`') if isinstance(generator, str): generator = import_variable_from_module(generator) self._add_extra_pages(prefix, generator(self.app)) elif isinstance(extra, str): url = parse_absolute_url( urljoin(prefix, decode_input_path(extra))) try: self.add_task( url, reason='extra page', ) except ExternalURLError: raise ExternalURLError( f'External URL specified in extra_pages: {url}') else: generator = extra self._add_extra_pages(prefix, generator(self.app))
def test_scheme(): parsed = parse_absolute_url("https://freezeyt.test:1234/foo/") assert parsed.scheme == 'https'
def test_netloc(): parsed = parse_absolute_url("http://freezeyt.test:1234/foo/") assert parsed.netloc == parsed.host + ':1234'
def test_port(): parsed = parse_absolute_url("http://freezeyt.test:1234/foo/") assert parsed.port == 1234
def test_no_port_https(): parsed = parse_absolute_url("https://naucse.python.cz/") assert parsed.netloc == 'naucse.python.cz:443'
def test_no_scheme(): with pytest.raises(ValueError): parse_absolute_url("pyladies.cz/")
def test_unicode_host(): parsed = parse_absolute_url("https://čau☺フ.даль.рф:1234/foo/") assert parsed.ascii_host == 'xn--au-dma4819a4cl.xn--80ahw2e.xn--p1ai'
def test_query(): parsed = parse_absolute_url("https://freezeyt.test:1234/foo/?a=123") assert parsed.query == 'a=123'
def test_no_scheme(): with pytest.raises(RelativeURLError): parse_absolute_url("pyladies.cz/")
import pytest from freezeyt.util import parse_absolute_url, add_port url_with_port = parse_absolute_url('http://localhost:80/') # To construct URLs without ports, we use "url_with_port.join". # The parse_absolute_url function always adds a port. def test_add_http_port(): tested_url = url_with_port.join('http://localhost/') assert tested_url.to_url() == 'http://localhost/' assert add_port(tested_url).to_url() == 'http://localhost:80/' def test_add_https_port(): tested_url = url_with_port.join('https://localhost/') assert tested_url.to_url() == 'https://localhost/' assert add_port(tested_url).to_url() == 'https://localhost:443/' def test_add_port_unknown_scheme(): tested_url = url_with_port.join('unknownscheme://localhost/') assert tested_url.to_url() == 'unknownscheme://localhost/' with pytest.raises(ValueError): add_port(tested_url) def test_no_add_http_port(): tested_url = url_with_port.join('http://localhost:1234/')
def __init__(self, app, config): self.app = app self.config = config self.freeze_info = hooks.FreezeInfo(self) self.extra_pages = config.get('extra_pages', ()) self.extra_files = config.get('extra_files', None) self.url_finders = parse_handlers( config.get('url_finders', DEFAULT_URL_FINDERS), default_module='freezeyt.url_finders') _status_handlers = dict(DEFAULT_STATUS_HANDLERS, **config.get('status_handlers', {})) self.status_handlers = parse_handlers( _status_handlers, default_module='freezeyt.status_handlers') prefix = config.get('prefix', 'http://localhost:8000/') # Decode path in the prefix URL. # Save the parsed version of prefix as self.prefix prefix_parsed = parse_absolute_url(prefix) decoded_path = decode_input_path(prefix_parsed.path) if not decoded_path.endswith('/'): raise ValueError('prefix must end with /') self.prefix = prefix_parsed.replace(path=decoded_path) output = config['output'] if isinstance(output, str): output = {'type': 'dir', 'dir': output} if output['type'] == 'dict': self.saver = DictSaver(self.prefix) elif output['type'] == 'dir': try: output_dir = output['dir'] except KeyError: raise ValueError("output directory not specified") self.saver = FileSaver(Path(output_dir), self.prefix) else: raise ValueError(f"unknown output type {output['type']}") self.url_to_path = config.get('url_to_path', default_url_to_path) if isinstance(self.url_to_path, str): self.url_to_path = import_variable_from_module(self.url_to_path) # The tasks for individual pages are tracked in the followng sets # (actually dictionaries: {task.path: task}) # Each task must be in exactly in one of these. self.done_tasks = {} self.redirecting_tasks = {} self.pending_tasks = {} self.inprogress_tasks = {} self.task_queues = { TaskStatus.PENDING: self.pending_tasks, TaskStatus.DONE: self.done_tasks, TaskStatus.REDIRECTING: self.redirecting_tasks, TaskStatus.IN_PROGRESS: self.inprogress_tasks, } self.add_task(prefix_parsed, reason='site root (homepage)') self._add_extra_pages(prefix, self.extra_pages) self.hooks = {} for name, func in config.get('hooks', {}).items(): if isinstance(func, str): func = import_variable_from_module(func) self.hooks[name] = func
def test_absolute(): with pytest.raises(ValueError): parse_absolute_url("/a/b/c")
def test_url_scheme(): with pytest.raises(ValueError): parse_absolute_url("ftp://localhost:8000/second_page.html")
def test_no_netloc(): with pytest.raises(ValueError): parse_absolute_url("pyladies.cz/foo")
def test_path(): parsed = parse_absolute_url("https://freezeyt.test:1234/foo/") assert parsed.path == '/foo/'
def test_absolute(): with pytest.raises(RelativeURLError): parse_absolute_url("/a/b/c")
def test_no_netloc(): with pytest.raises(RelativeURLError): parse_absolute_url("pyladies.cz/foo")
def test_fragment(): parsed = parse_absolute_url("https://freezeyt.test:1234/foo/#heading") assert parsed.fragment == 'heading'
def test_no_port_http(): parsed = parse_absolute_url("http://pyladies.cz/") assert parsed.netloc == 'pyladies.cz:80'
def add_url(self, url, reason=None): self._freezer.add_task(parse_absolute_url(url), reason=reason)
def __init__(self, app, config): self.app = app self.config = config self.check_version(self.config.get('version')) self.freeze_info = hooks.FreezeInfo(self) CONFIG_DATA = (('extra_pages', ()), ('extra_files', None), ('default_mimetype', 'application/octet-stream'), ('get_mimetype', default_mimetype), ('mime_db_file', None), ('url_to_path', default_url_to_path)) for attr_name, default in CONFIG_DATA: setattr(self, attr_name, config.get(attr_name, default)) if self.mime_db_file: with open(self.mime_db_file) as file: mime_db = json.load(file) mime_db = convert_mime_db(mime_db) self.get_mimetype = functools.partial(mime_db_mimetype, mime_db) if isinstance(self.get_mimetype, str): self.get_mimetype = import_variable_from_module(self.get_mimetype) if isinstance(self.url_to_path, str): self.url_to_path = import_variable_from_module(self.url_to_path) if config.get('use_default_url_finders', True): _url_finders = dict(DEFAULT_URL_FINDERS, **config.get('url_finders', {})) else: _url_finders = config.get('url_finders', {}) self.url_finders = parse_handlers( _url_finders, default_module='freezeyt.url_finders') _status_handlers = dict(DEFAULT_STATUS_HANDLERS, **config.get('status_handlers', {})) self.status_handlers = parse_handlers( _status_handlers, default_module='freezeyt.status_handlers') for key in self.status_handlers: if not STATUS_KEY_RE.fullmatch(key): raise ValueError( 'Status descriptions must be strings with 3 digits or one ' + f'digit and "xx", got f{key!r}') prefix = config.get('prefix', 'http://localhost:8000/') # Decode path in the prefix URL. # Save the parsed version of prefix as self.prefix prefix_parsed = parse_absolute_url(prefix) decoded_path = decode_input_path(prefix_parsed.path) if not decoded_path.endswith('/'): raise ValueError('prefix must end with /') self.prefix = prefix_parsed.replace(path=decoded_path) output = config['output'] if isinstance(output, str): output = {'type': 'dir', 'dir': output} if output['type'] == 'dict': self.saver = DictSaver(self.prefix) elif output['type'] == 'dir': try: output_dir = output['dir'] except KeyError: raise ValueError("output directory not specified") self.saver = FileSaver(Path(output_dir), self.prefix) else: raise ValueError(f"unknown output type {output['type']}") # The tasks for individual pages are tracked in the followng sets # (actually dictionaries: {task.path: task}) # Each task must be in exactly in one of these. self.done_tasks = {} self.redirecting_tasks = {} self.inprogress_tasks = {} self.failed_tasks = {} self.task_queues = { TaskStatus.DONE: self.done_tasks, TaskStatus.REDIRECTING: self.redirecting_tasks, TaskStatus.IN_PROGRESS: self.inprogress_tasks, TaskStatus.FAILED: self.failed_tasks, } try: self.add_task(prefix_parsed, reason='site root (homepage)') self._add_extra_files() self._add_extra_pages(prefix, self.extra_pages) self.hooks = {} for name, funcs in config.get('hooks', {}).items(): for func in funcs: if isinstance(func, str): func = import_variable_from_module(func) self.add_hook(name, func) for plugin in config.get('plugins', {}): if isinstance(plugin, str): plugin = import_variable_from_module(plugin) plugin(self.freeze_info) self.semaphore = asyncio.Semaphore(MAX_RUNNING_TASKS) except: self.cancel_tasks() raise