Ejemplo n.º 1
0
def test_is_external_no_port():
    with pytest.raises(ValueError):
        is_external(
            url_parse('http://localhost/a/'),
            url_parse('http://localhost:80/a/'),
        )
    with pytest.raises(ValueError):
        is_external(
            url_parse('http://localhost:80/a/'),
            url_parse('http://localhost/a/'),
        )
Ejemplo n.º 2
0
    def add_task(self,
                 url: URL,
                 *,
                 external_ok: bool = False,
                 reason: str = None) -> Optional[Task]:
        """Add a task to freeze the given URL

        If no task is added (e.g. for external URLs), return None.
        """
        if is_external(url, self.prefix):
            if external_ok:
                return None
            raise ExternalURLError(f'Unexpected external URL: {url}')

        path = get_path_from_url(self.prefix, url, self.url_to_path)

        for queue in self.task_queues.values():
            if path in queue:
                task = queue[path]
                task.urls.add(url)
                break
        else:
            # The `else` branch is entered if the loop ended normally
            # (not with `break`, or exception, return, etc.)
            # Here, this means the task wasn't found.
            task = Task(path, {url}, self)
            self.pending_tasks[path] = task
        if reason:
            task.reasons.add(reason)
        return task
Ejemplo n.º 3
0
    def url_to_filename(self, parsed_url):
        """Return the filename to which the page is frozen.

        Parameters:
        parsed_url
            Parsed URL (eg. url_parse(http://example.com:8000/foo/second.html))
            to convert to filename
        """
        if is_external(parsed_url, self.prefix):
            raise ValueError(f'external url {parsed_url}')

        url_path = parsed_url.path

        if url_path.startswith(self.prefix.path):
            url_path = '/' + url_path[len(self.prefix.path):]

        if url_path.endswith('/'):
            url_path = url_path + 'index.html'

        return self.base_path / encode_file_path(url_path).lstrip('/')
Ejemplo n.º 4
0
def get_path_from_url(prefix, url, url_to_path):
    if is_external(url, prefix):
        raise ValueError(f'external url {url}')

    path = url.path

    if path.startswith(prefix.path):
        path = path[len(prefix.path):]

    result = url_to_path(path)

    result = PurePosixPath(result)

    if result.is_absolute():
        raise ValueError(
            f"Path may not be absolute: {result}(from {url.to_url()})")
    assert '.' not in result.parts
    if '..' in result.parts:
        raise ValueError(
            f"Path may not contain /../ segment: {result}(from {url.to_url()})"
        )

    return result
Ejemplo n.º 5
0
def test_is_external_positive():
    assert not is_external(
        url_parse('http://localhost:80/a/b/c'),
        url_parse('http://localhost:80/a/'),
    )
Ejemplo n.º 6
0
def test_is_external_root_index():
    assert not is_external(
        url_parse('http://localhost:80'),
        url_parse('http://localhost:80/'),
    )
Ejemplo n.º 7
0
def test_is_external_index():
    assert is_external(
        url_parse('http://localhost:80/a'),
        url_parse('http://localhost:80/a/'),
    )
Ejemplo n.º 8
0
def test_is_external_same():
    assert not is_external(
        url_parse('http://localhost:80/a/'),
        url_parse('http://localhost:80/a/'),
    )
Ejemplo n.º 9
0
def test_is_external_negative_same_host():
    assert is_external(
        url_parse('http://localhost:80/a/'),
        url_parse('http://localhost:80/a/b/c/'),
    )
Ejemplo n.º 10
0
def test_is_external_negative():
    assert is_external(
        url_parse('http://example.com:80/a/'),
        url_parse('http://localhost:80/a/b/c/'),
    )