Exemple #1
0
def test_scripts_page():
    target_url_1 = "http://perdu.com/"
    target_url_2 = "http://perdu2.com/"
    target_url_3 = "http://perdu3.com/"
    target_url_4 = "http://perdu4.com/"
    target_url_5 = "http://perdu5.com/"

    page_content_1 = "<script src='javascript.js'></script>"
    page_content_2 = "<script src='https:///?foo=bar'></script>"
    page_content_3 = "<script src='https://*****:*****@NetLoc:80/awesome-script.js'></script>"
    page_content_4 = "<script src='user:pass@NetLoc:80/awesome-script.js'></script>"
    page_content_5 = "<script src='http://netloc/awesome-script.js'></script>"

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content_1,
    ))
    respx.get(target_url_2).mock(return_value=httpx.Response(
        200,
        text=page_content_2,
    ))
    respx.get(target_url_3).mock(return_value=httpx.Response(
        200,
        text=page_content_3,
    ))
    respx.get(target_url_4).mock(return_value=httpx.Response(
        200,
        text=page_content_4,
    ))
    respx.get(target_url_5).mock(return_value=httpx.Response(
        200,
        text=page_content_5,
    ))
    # internal url
    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)
    assert len(page.scripts) == 1
    assert page.scripts[0] == "http://perdu.com/javascript.js"

    # wrongly formatted url
    resp = httpx.get(target_url_2, follow_redirects=False)
    page = Page(resp)
    assert len(page.scripts) == 0

    # with scheme & netloc
    resp = httpx.get(target_url_3, follow_redirects=False)
    page = Page(resp)
    assert len(page.scripts) == 1
    assert page.scripts[0] == "https://*****:*****@NetLoc:80/awesome-script.js"

    # without scheme but with netloc
    resp = httpx.get(target_url_4, follow_redirects=False)
    page = Page(resp)
    assert len(page.scripts) == 0

    # without extension
    resp = httpx.get(target_url_5, follow_redirects=False)
    page = Page(resp)
    assert len(page.scripts) == 1
    assert page.scripts[0] == "http://netloc/awesome-script.js"
Exemple #2
0
def test_content_page():
    target_url_1 = "http://perdu.com/"
    target_url_2 = "http://perdu2.com/"
    target_url_3 = "http://perdu3.com/"
    page_content_1 = "foobar"
    page_content_2 = ""

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content_1,
    ))

    respx.get(target_url_2).mock(return_value=httpx.Response(
        200,
        text=page_content_2,
    ))

    respx.get(target_url_3).mock(return_value=httpx.Response(
        200,
        text=page_content_1,
    ))

    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)

    assert page.content == "foobar"

    resp = httpx.get(target_url_2, follow_redirects=False)
    page = Page(resp)

    assert page.content == ""
Exemple #3
0
def test_iter_frame_page():
    target_url_1 = "http://perdu.com/"

    page_content_1 = """
    <html>
        <head>
            <title>Foobar</title>
        </head>
        <body>
            <iframe id="inlineFrameExample"
                title="Inline Frame Example"
                width="300"
                height="200"
                src="http://example.com">
            </iframe>
        </body>
    </html>
    """

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content_1,
    ))

    # basic html
    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)

    assert next(page.iter_frames()) == "http://example.com/"
Exemple #4
0
def test_make_absolute():
    TEST_CASES = [
        ("http://base.url", "relative", "http://base.url/relative"),
        ("http://base.url", ".", "http://base.url/"),
        ("http://base.url/with_folder", ".", "http://base.url/"),
        ("http://base.url/with_folder", "./with_dot",
         "http://base.url/with_dot"),
        ("http://base.url/with_folder", "..", "http://base.url/"),
        ("http://base.url/with_folder", "../folder", "http://base.url/folder"),
        ("http://base.url", "http://whole.url", "http://whole.url/"),
        ("http://base.url", "https://whole.url", "https://whole.url/"),
        ("http://base.url", "http://whole.url:987", "http://whole.url:987/"),
        ("http://base.url", "https://whole.url:987", "https://whole.url:987/"),
        ("http://base.url", "/", "http://base.url/"),
        ("http://base.url", "//", ""),
        ("http://base.url", "//only_this", "http://only_this/"),
        ("http://base.url", "./..//", "http://base.url/"),
        ("http://base.url", "./wrong_folder/../good_folder/",
         "http://base.url/good_folder/"),
    ]

    request = Request("GET", "http://base.url")
    response = Response(status_code=200, request=request)
    page = Page(response)

    for base_url, relative_url, expected in TEST_CASES:
        page._base = base_url
        assert page.make_absolute(relative_url) == expected, \
            f"Absolute url from base_url='{base_url}' and relative_url='{relative_url}' is not '{expected}'"
Exemple #5
0
def test_redirection_url_page():
    target_url_1 = "http://perdu.com/"
    target_url_2 = "http://perdu2.com/"
    target_url_3 = "http://perdu3.com/"

    page_content = """
    <html>
        <head>
            <title>Foobar</title>
        </head>
        <body>

        </body>
    </html>
    """

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content,
    ))

    respx.get(target_url_2).mock(return_value=httpx.Response(
        302,
        text=page_content,
        headers=[("Location", "index.html")],
    ))

    respx.get(target_url_3).mock(return_value=httpx.Response(
        302,
        text=page_content,
        headers=[("Location", "http://perdu3.com/")],
    ))

    # No redirect
    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)

    assert page.redirection_url == ""
    assert page.is_directory_redirection is False

    # Redirection
    resp = httpx.get(target_url_2, follow_redirects=False)
    page = Page(resp)

    assert page.redirection_url == "http://perdu2.com/index.html"
    assert page.is_directory_redirection is False

    # Same url
    resp = httpx.get(target_url_3, follow_redirects=False)
    page = Page(resp)

    assert page.redirection_url == "http://perdu3.com/"
    assert page.is_directory_redirection is True
Exemple #6
0
def test_size_page():
    target_url_1 = "http://perdu.com/"
    target_url_2 = "http://perdu2.com/"
    target_url_3 = "http://perdu3.com/"
    page_headers_1 = [
        ('content-length', '229,23'),
    ]

    page_headers_2 = [
        ('content-length', '240;23'),
    ]
    page_content = """
    <html>
        <head>
            <title>Test</title>
        </head>
        <body>
        </body>
    </html>
    """

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content,
        headers=page_headers_1,
    ))

    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)

    assert page.size == 229

    respx.get(target_url_2).mock(return_value=httpx.Response(
        200,
        text=page_content,
        headers=page_headers_2,
    ))

    resp = httpx.get(target_url_2, follow_redirects=False)
    page = Page(resp)

    assert page.size == 240

    respx.get(target_url_3).mock(return_value=httpx.Response(
        200,
        text=page_content,
        headers=(),
    ))

    resp = httpx.get(target_url_3, follow_redirects=False)
    page = Page(resp)

    assert page.size == 122
Exemple #7
0
def test_soup_page():
    target_url_1 = "http://perdu.com/"

    page_content_1 = """
    <html>
        <head>
            <title>Foobar</title>
        </head>
        <body>
            <h1>Perdu sur l'Internet ?</h1>
        </body>
    </html>
    """
    target_url_2 = "http://perdu2.com/"

    page_content_2 = """
    <html>
        <head>
            <title>Foobar</title>
            <base href="https://example.com" />
        </head>
        <body>
            <h1>Perdu sur l'Internet ?</h1>
            <a href="/somewhere">Anker</a>
        </body>
    </html>
    """

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content_1,
    ))
    respx.get(target_url_2).mock(return_value=httpx.Response(
        200,
        text=page_content_2,
    ))

    # basic html
    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)

    assert page.soup is not None
    assert page.soup.find("title").get_text() == "Foobar"

    # base tag
    resp = httpx.get(target_url_2, follow_redirects=False)
    page = Page(resp)

    assert page.soup is not None
    assert page.base_url == "https://example.com/"
    assert page.soup.find("title").get_text() == "Foobar"
Exemple #8
0
def test_title_page():
    target_url_1 = "http://perdu.com/"

    page_content_1 = """
    <html>
        <head>
            <title>Foobar</title>
        </head>
        <body>
            <h1>Perdu sur l'Internet ?</h1>
        </body>
    </html>
    """

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content_1,
    ))

    # basic html
    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)

    assert page.soup is not None
    assert page.title == "Foobar"
Exemple #9
0
    async def async_post(self,
                         form: web.Request,
                         follow_redirects: bool = False,
                         headers: dict = None) -> Page:
        """Submit the given form, returns a Page on success, None otherwise.

        @type form: web.Request
        @type follow_redirects: bool
        @type headers: dict
        @rtype: Page
        """
        form_headers = {}
        if not form.is_multipart:
            form_headers = {"Content-Type": form.enctype}

        if isinstance(headers, dict) and headers:
            form_headers.update(headers)

        if form.referer:
            form_headers["referer"] = form.referer

        if form.is_multipart or "urlencoded" in form.enctype:
            file_params = form.file_params
            post_params = form.post_params
        else:
            file_params = None
            post_params = form.post_params

        content = None

        if post_params:
            if isinstance(post_params, str):
                content = post_params
                post_params = None
            else:
                content = None
                post_params = dict(post_params)
        else:
            post_params = None

        request = self.client.build_request(
            "POST",
            form.path,
            params=form.get_params,
            data=
            post_params,  # httpx expects a dict, hope to see more types soon
            content=content,
            files=file_params or None,
            headers=form_headers,
            timeout=self._timeout)
        try:
            response = await self.client.send(
                request, stream=self.stream, follow_redirects=follow_redirects)
        except httpx.TransportError as exception:
            if "Read timed out" in str(exception):
                raise httpx.ReadTimeout("Request time out", request=None)

            raise exception

        return Page(response)
Exemple #10
0
    async def async_get(self,
                        resource: web.Request,
                        follow_redirects: bool = False,
                        headers: dict = None) -> Page:
        """Fetch the given url, returns a Page object on success, None otherwise.
        If None is returned, the error code can be obtained using the error_code property.

        @param resource: URL to get.
        @type resource: web.Request
        @param follow_redirects: If set to True, responses with a 3XX code and a Location header will be followed.
        @type follow_redirects: bool
        @param headers: Dictionary of additional headers to send with the request.
        @type headers: dict
        @rtype: Page
        """
        request = self.client.build_request("GET",
                                            resource.url,
                                            headers=headers)
        try:
            response = await self.client.send(request,
                                              stream=self.stream,
                                              allow_redirects=follow_redirects,
                                              timeout=self._timeout)
        except httpx.TransportError as exception:
            if "Read timed out" in str(exception):
                raise httpx.ReadTimeout("Request time out", request=None)

            raise exception

        return Page(response)
Exemple #11
0
    def get(self,
            resource: web.Request,
            follow_redirects: bool = False,
            headers: dict = None) -> Page:
        """Fetch the given url, returns a Page object on success, None otherwise.
        If None is returned, the error code can be obtained using the error_code property.

        @param resource: URL to get.
        @type resource: web.Request
        @param follow_redirects: If set to True, responses with a 3XX code and a Location header will be followed.
        @type follow_redirects: bool
        @param headers: Dictionary of additional headers to send with the request.
        @type headers: dict
        @rtype: Page
        """
        try:
            response = self._session.get(resource.url,
                                         timeout=self._timeout,
                                         allow_redirects=follow_redirects,
                                         headers=headers,
                                         verify=self.secure)
        except ConnectionError as exception:
            # https://github.com/kennethreitz/requests/issues/2392
            # Unfortunately chunked transfer + timeout raise ConnectionError... let's fix that
            if "Read timed out" in str(exception):
                raise ReadTimeout("Request time out")

            raise exception

        return Page(response)
Exemple #12
0
def test_html_redirection():
    target_url_1 = "http://perdu.com/"
    target_url_2 = "http://perdu2.com/"
    target_url_3 = "http://perdu3.com/"

    page_content_1 = """
    <!DOCTYPE html>
    <html>
        <head>
            <meta http-equiv="refresh" content="0;url=http://test.com/" />
            <title>Foobar</title>
        </head>
        <body>
            <h1>Perdu sur l'Internet ?</h1>
        </body>
    </html>
    """

    page_content_2 = """
    <!DOCTYPE html>
    <html>
        <head>
            <meta http-equiv="refresh" content="0;url='http://test.com/'" />
            <title>Foobar</title>
        </head>
        <body>
            <h1>Perdu sur l'Internet ?</h1>
        </body>
    </html>
    """

    page_content_3 = """
    <!DOCTYPE html>
    <html>
        <head>
            <meta http-equiv="refresh" content='0;url="http://test.com/"' />
            <title>Foobar</title>
        </head>
        <body>
            <h1>Perdu sur l'Internet ?</h1>
        </body>
    </html>
    """

    target_urls = [target_url_1, target_url_2, target_url_3]
    page_contents = [page_content_1, page_content_2, page_content_3]

    for (target_url, page_content) in zip(target_urls, page_contents):
        respx.get(target_url).mock(return_value=httpx.Response(
            200,
            text=page_content,
        ))

        resp = httpx.get(target_url, follow_redirects=False)
        page = Page(resp)

        assert len(page.html_redirections) == 1
        assert page.html_redirections[0] == "http://test.com/"
Exemple #13
0
    def _extract_disconnect_urls(self, page: Page) -> List[str]:
        """
        Extract all the disconnect urls on the given page and returns them.
        """
        disconnect_urls = []
        for link in page.links:
            if self.is_in_scope(link) is False:
                continue

            if re.search(DISCONNECT_REGEX, link) is not None:
                disconnect_urls.append(page.make_absolute(link))
        return disconnect_urls
Exemple #14
0
    def post(self,
             form: web.Request,
             follow_redirects: bool = False,
             headers: dict = None) -> Page:
        """Submit the given form, returns a Page on success, None otherwise.

        @type form: web.Request
        @type follow_redirects: bool
        @type headers: dict
        @rtype: Page
        """
        form_headers = {}
        if not form.is_multipart:
            # requests won't generate valid upload HTTP request if we give it a multipart/form-data content-type
            # valid requests with boundary info or made if file_params is not empty.
            form_headers = {"Content-Type": form.enctype}

        if isinstance(headers, dict) and len(headers):
            form_headers.update(headers)

        if form.referer:
            form_headers["referer"] = form.referer

        if form.is_multipart:
            file_params = form.post_params + form.file_params
            post_params = []
        elif "urlencoded" in form.enctype:
            file_params = form.file_params
            post_params = form.post_params
        else:
            file_params = None
            post_params = form.post_params

        try:
            response = self._session.post(
                form.
                path,  # We can use form.path with setting params or form.url without setting params
                params=form.get_params,
                data=post_params,
                files=file_params,
                headers=form_headers,
                timeout=self._timeout,
                allow_redirects=follow_redirects,
                verify=self.secure)
        except ConnectionError as exception:
            # https://github.com/kennethreitz/requests/issues/2392
            # Unfortunately chunked transfer + timeout raise ConnectionError... let's fix that
            if "Read timed out" in str(exception):
                raise ReadTimeout("Request time out")

            raise exception

        return Page(response)
Exemple #15
0
    async def async_request(self,
                            method: str,
                            form: web.Request,
                            follow_redirects: bool = False,
                            headers: dict = None) -> Page:
        """Submit the given form, returns a Page on success, None otherwise.

        @type method: str
        @type form: web.Request
        @type follow_redirects: bool
        @type headers: dict
        @rtype: Page
        """
        form_headers = {}
        if isinstance(headers, dict) and headers:
            form_headers.update(headers)

        if form.referer:
            form_headers["referer"] = form.referer

        post_params = form.post_params
        content = None

        if post_params:
            if isinstance(post_params, str):
                content = post_params
                post_params = None
            else:
                content = None
                post_params = dict(post_params)
        else:
            post_params = None

        request = self.client.build_request(
            method,
            form.url,
            data=post_params,
            content=content,
            files=form.file_params or None,
            headers=form_headers,
        )
        try:
            response = await self.client.send(request,
                                              stream=self.stream,
                                              allow_redirects=follow_redirects,
                                              timeout=self._timeout)
        except httpx.TransportError as exception:
            if "Read timed out" in str(exception):
                raise httpx.ReadTimeout("Request time out", request=None)

            raise exception

        return Page(response)
Exemple #16
0
def test_json_page():
    target_url_1 = "http://perdu.com/"
    target_url_2 = "http://perdu2.com/"
    target_url_3 = "http://perdu3.com/"

    page_content_1 = None
    page_content_2 = "{'a': 1}"
    page_content_3 = None

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content_1,
    ))
    respx.get(target_url_2).mock(return_value=httpx.Response(
        200,
        text=page_content_2,
    ))

    respx.get(target_url_3).mock(return_value=httpx.Response(
        200,
        text=page_content_3,
    ))

    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)

    assert page.json is None

    mock.patch('httpx.Response.json', return_value=ValueError(None))

    resp = httpx.get(target_url_2, follow_redirects=False)
    page = Page(resp)

    assert page.json == {'a': 1}

    resp = httpx.get(target_url_3, follow_redirects=False)
    page = Page(resp)
    mock.patch('httpx.Response.json', return_value=ValueError(None))
    mock.patch('ast.literal_eval', return_value=ValueError(None))
    assert page.json is None
Exemple #17
0
def test_bytes_page():
    target_url_1 = "http://perdu.com/"
    page_content_1 = ""

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content_1,
    ))

    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)

    assert page.bytes == b""
Exemple #18
0
def test_extract_disconnect_urls_no_url():
    target_url = "http://perdu.com/"
    respx.get(target_url).mock(return_value=httpx.Response(
        200,
        text=
        "<html><head><title>Vous Etes Perdu ?</title></head><body><h1>Perdu sur l'Internet ?</h1> \
            <h2>Pas de panique, on va vous aider</h2> \
            <strong><pre>    * <----- vous &ecirc;tes ici</pre></strong><a href='http://perdu.com/foobar/'></a> \
            <a href='http://perdu.com/foobar/foobar'></a></body></html>"))

    resp = httpx.get(target_url, follow_redirects=False)
    page = Page(resp)

    crawler = AsyncCrawler(Request(target_url), timeout=1)

    disconnect_urls = crawler._extract_disconnect_urls(page)

    assert len(disconnect_urls) == 0
Exemple #19
0
    def request(self,
                method: str,
                form: web.Request,
                follow_redirects: bool = False,
                headers: dict = None) -> Page:
        """Submit the given form, returns a Page on success, None otherwise.

        @type method: str
        @type form: web.Request
        @type follow_redirects: bool
        @type headers: dict
        @rtype: Page
        """
        form_headers = {}
        if isinstance(headers, dict) and len(headers):
            form_headers.update(headers)

        if form.referer:
            form_headers["referer"] = form.referer

        try:
            response = self._session.request(method,
                                             form.url,
                                             data=form.post_params,
                                             files=form.file_params,
                                             headers=form_headers,
                                             allow_redirects=follow_redirects,
                                             timeout=self._timeout,
                                             verify=self.secure)
        except ConnectionError as exception:
            # https://github.com/kennethreitz/requests/issues/2392
            # Unfortunately chunked transfer + timeout raise ConnectionError... let's fix that
            if "Read timed out" in str(exception):
                raise ReadTimeout("Request time out")

            raise exception

        return Page(response)
Exemple #20
0
async def test_applicationdata():
    categories_file_path = "categories.txt"
    groups_file_path = "groups.txt"
    technologies_file_path = "technologies.txt"

    groups_text = """{
        "9": {
            "name": "Web development"
        }
    }"""

    categories_text = """{
        "27": {
            "groups": [
                9
            ],
            "name": "Programming languages",
            "priority": 5
        },
        "25": {
            "groups": [
                9
            ],
            "name": "JavaScript graphics",
            "priority": 6
        }
    }"""

    technologies_text = """{
        "PHP": {
            "cats": [
                27
            ],
            "cookies": {
                "PHPSESSID": ""
            },
            "cpe": "cpe:/a:php:php",
            "description": "PHP is a general-purpose scripting language used for web development.",
            \"headers\": {\"Server\": \"php\/?([\\\\d.]+)?\\\\;version:\\\\1\",\"X-Powered-By\": \"^php\/?([\\\\d.]+)?\\\\;version:\\\\1\"},
            "icon": "PHP.svg",
            "url": \"\\\\.php(?:$|\\\\?)\",
            "website": "http://php.net"
        },
         "A-Frame": {
            "cats": [
            25
            ],
            "html": "<a-scene[^<>]*>",
            "icon": "A-Frame.svg",
            "implies": "three.js",
            "js": {
                \"AFRAME.version\": \"^(.+)$\\\\;version:\\\\1\"
            },
            \"scripts\": \"\/?([\\\\d.]+)?\/aframe(?:\\\\.min)?\\\\.js\\\\;version:\\\\1\",
            "website": "https://aframe.io"
        }
    }"""

    def get_mock_open(files: Dict[str, str]):
        def open_mock(filename, *args, **kwargs):
            for expected_filename, content in files.items():
                if filename == expected_filename:
                    return mock_open(read_data=content).return_value
            raise FileNotFoundError('(mock) Unable to open {filename}')

        return MagicMock(side_effect=open_mock)

    files = {
        f'{categories_file_path}': categories_text,
        f'{groups_file_path}': groups_text,
        f'{technologies_file_path}': technologies_text,
    }

    application_data = None

    with mock.patch("builtins.open", get_mock_open(files)):
        application_data = ApplicationData(categories_file_path,
                                           groups_file_path,
                                           technologies_file_path)

    assert application_data is not None
    assert len(application_data.get_applications()) == 2
    assert len(application_data.get_categories()) == 2
    assert len(application_data.get_groups()) == 1

    target_url = "http://perdu.com/"

    respx.get(target_url).mock(return_value=httpx.Response(
        200,
        text=
        "<html><head><title>Vous Etes Perdu ?</title></head><body><h1>Perdu sur l'Internet ?</h1> \
            <h2>Pas de panique, on va vous aider</h2> \
            <strong><pre>    * <----- vous &ecirc;tes ici</pre></strong></body></html>",
        headers=[('server',
                  'nginx/1.19.0'), ('content-type',
                                    'text/html; charset=UTF-8'),
                 ('x-powered-by',
                  'PHP/5.6.40-38+ubuntu20.04.1+deb.sury.org+1')]))

    resp = httpx.get(target_url, follow_redirects=False)
    page = Page(resp)

    wappalyzer = Wappalyzer(application_data, page)

    result = wappalyzer.detect_with_versions_and_categories_and_groups()

    assert len(result) == 1
    assert result.get("PHP") is not None
    assert len(result.get("PHP").get("categories")) == 1
    assert result.get("PHP").get("categories")[0] == "Programming languages"
    assert len(result.get("PHP").get("groups")) == 1
    assert result.get("PHP").get("groups")[0] == "Web development"
    assert result.get("A-Frame") is None
Exemple #21
0
def test_page():
    target_url = "http://perdu.com/"
    page_headers = [('server', 'nginx/1.19.0'), ('content-length', '229'),
                    ('content-type', 'text/plain; charset=utf-8')]
    page_links = [
        'http://perdu.com/action_page2.php',
        'https://foo.bar/',
        'https://abc.abc/',
        'http://perdu.com/action_page.php',
        'http://perdu.com/userinfo.php',
    ]
    page_extra_links = [
        'https://bar.foo/',
        'http://perdu.com/test.jpg',
        'http://perdu.com/javascript.js',
        'http://perdu.com/test.swf',
        'http://perdu.com/test.swf',
        'http://perdu.com/test.png',
    ]

    page_form_requests = [
        'http://perdu.com/action_page.php',
        'http://perdu.com/action_page2.php',
        'http://perdu.com/userinfo.php',
    ]

    page_content = """
    <html>
        <head>
            <title>Vous Etes Perdu ?</title>
            <meta name="color-scheme" content="dark light">
            <meta name="description" content="test">
            <meta name="keywords" content="lost">
            <meta name="generator" content="gen">
        </head>
        <body>
            <h1>Perdu sur l'Internet ?</h1>
            <h2>Pas de panique, on va vous aider</h2>
            <iframe id="foobarframe"
                title="Foobar Frame"
                width="300"
                height="200"
                src="https://foo.bar/">
            </iframe>
            <form name="loginform" method="post" action="userinfo.php">
                <table cellpadding="4" cellspacing="1">
                    <tr><td>Username : </td><td><input name="uname" type="text" size="20" style="width:120px;"></td></tr>
                    <tr><td>Password : </td><td><input name="pass" type="password" size="20" style="width:120px;"></td></tr>
                    <tr><td colspan="2" align="right"><input type="submit" value="login" style="width:75px;"></td></tr>
                </table>
            </form>
            <area shape="rect" coords="184,6,253,27"
                href="https://bar.foo"
                target="_blank" />
            <p>hello</p>
            <object type="application/x-shockwave-flash" data="/test.swf" width="800" height="360">
                <param name="movie" value="/test.swf">
                <param name="wmode" value="transparent">
                <p>You need to enable Flash to view this content.</p>
            </object>
            <a href="https://abc.abc/"></a>
            <form action="/action_page.php" method="get" class="form-example">
            </form>
            <img class="picture"
                src="/test.jpg"
                srcset="/test.png 2x">
                >
            <button class="foo bar"
                    type="button"
                    formaction="/action_page2.php"
                    >
                OK
            </button>
            <form action="/action_page2.php" method="get" class="form-example">
            </form>
            <script src="javascript.js"></script>
            <strong>
                <pre>    * <----- vous &ecirc;tes ici</pre>
            </strong>
        </body>
    </html>
    """

    respx.get(target_url).mock(return_value=httpx.Response(
        200,
        text=page_content,
        headers=page_headers,
    ))

    resp = httpx.get(target_url, follow_redirects=False)
    page = Page(resp)

    assert page.url == target_url
    assert page.history == []
    assert len(page.headers) == 3
    assert page.headers == page_headers
    assert len(page.cookies) == 0
    assert page.server == "nginx/1.19.0"
    assert page.is_plain is True
    assert page.size == 229
    assert page.raw_size == 229
    assert page.content == page_content
    assert page.bytes == str.encode(page_content)
    assert page.md5 == md5(str.encode(page_content)).hexdigest()
    assert page.status == 200
    assert page.type == "text/plain; charset=utf-8"
    assert len(page.scripts) == 1
    assert page.scripts[0] == "http://perdu.com/javascript.js"
    assert next(page.iter_frames()) == "https://foo.bar/"
    assert page.redirection_url == ""
    assert page.is_directory_redirection is False
    assert len(page.links) == 5
    assert page.links.count(page_links[0]) == 1
    assert page.links.count(page_links[1]) == 1
    assert page.links.count(page_links[2]) == 1
    assert page.links.count(page_links[3]) == 1
    assert page.links.count(page_links[4]) == 1
    assert page.is_external_to_domain(
        'http://perdu.com/blablabla/blablalba/blalba.html') is False
    assert page.is_external_to_domain(
        'http://p3rdu.com/blablabla/blablalba/blalba.html') is True
    assert page.is_internal_to_domain(
        'http://perdu.com/blablabla/blablalba/blalba.html') is True
    assert page.is_internal_to_domain(
        'http://p3rdu.com/blablabla/blablalba/blalba.html') is False
    assert page.title == "Vous Etes Perdu ?"
    assert isinstance(page.soup, BeautifulSoup)
    assert page.base_url is None
    assert len(page.metas) == 4
    assert page.metas.get("color-scheme") == "dark light"
    assert page.description == "test"
    assert page.keywords == ["lost"]
    assert page.generator == "gen"
    assert page.text_only is not None  # @fixme later
    assert page.text_only_md5 is not None  # @fixme
    assert page.favicon_url == target_url + "favicon.ico"
    assert len(page.images_urls) == 1
    assert page.images_urls[0] == target_url + "test.jpg"
    for url in page.extra_urls:
        assert url in page_extra_links
    assert len(page.js_redirections) == 0
    assert len(page.html_redirections) == 0
    assert len(page.all_redirections) == 0
    for request in page.iter_forms():
        assert request.url in page_form_requests
    login_form, username_field, password_field = page.find_login_form()
    assert username_field == 0
    assert password_field == 1
    assert login_form.url == "http://perdu.com/userinfo.php"
    assert login_form.encoded_data == "uname=&pass="