Ejemplo n.º 1
0
def test_iter_frame_page():
    target_url_1 = "http://perdu.com/"

    page_content_1 = """
    <html>
        <head>
            <title>Foobar</title>
        </head>
        <body>
            <iframe id="inlineFrameExample"
                title="Inline Frame Example"
                width="300"
                height="200"
                src="http://example.com">
            </iframe>
        </body>
    </html>
    """

    respx.get(target_url_1).mock(return_value=httpx.Response(
        200,
        text=page_content_1,
    ))

    # basic html
    resp = httpx.get(target_url_1, follow_redirects=False)
    page = Page(resp)

    assert next(page.iter_frames()) == "http://example.com/"
Ejemplo n.º 2
0
def test_page():
    target_url = "http://perdu.com/"
    page_headers = [('server', 'nginx/1.19.0'), ('content-length', '229'),
                    ('content-type', 'text/plain; charset=utf-8')]
    page_links = [
        'http://perdu.com/action_page2.php',
        'https://foo.bar/',
        'https://abc.abc/',
        'http://perdu.com/action_page.php',
        'http://perdu.com/userinfo.php',
    ]
    page_extra_links = [
        'https://bar.foo/',
        'http://perdu.com/test.jpg',
        'http://perdu.com/javascript.js',
        'http://perdu.com/test.swf',
        'http://perdu.com/test.swf',
        'http://perdu.com/test.png',
    ]

    page_form_requests = [
        'http://perdu.com/action_page.php',
        'http://perdu.com/action_page2.php',
        'http://perdu.com/userinfo.php',
    ]

    page_content = """
    <html>
        <head>
            <title>Vous Etes Perdu ?</title>
            <meta name="color-scheme" content="dark light">
            <meta name="description" content="test">
            <meta name="keywords" content="lost">
            <meta name="generator" content="gen">
        </head>
        <body>
            <h1>Perdu sur l'Internet ?</h1>
            <h2>Pas de panique, on va vous aider</h2>
            <iframe id="foobarframe"
                title="Foobar Frame"
                width="300"
                height="200"
                src="https://foo.bar/">
            </iframe>
            <form name="loginform" method="post" action="userinfo.php">
                <table cellpadding="4" cellspacing="1">
                    <tr><td>Username : </td><td><input name="uname" type="text" size="20" style="width:120px;"></td></tr>
                    <tr><td>Password : </td><td><input name="pass" type="password" size="20" style="width:120px;"></td></tr>
                    <tr><td colspan="2" align="right"><input type="submit" value="login" style="width:75px;"></td></tr>
                </table>
            </form>
            <area shape="rect" coords="184,6,253,27"
                href="https://bar.foo"
                target="_blank" />
            <p>hello</p>
            <object type="application/x-shockwave-flash" data="/test.swf" width="800" height="360">
                <param name="movie" value="/test.swf">
                <param name="wmode" value="transparent">
                <p>You need to enable Flash to view this content.</p>
            </object>
            <a href="https://abc.abc/"></a>
            <form action="/action_page.php" method="get" class="form-example">
            </form>
            <img class="picture"
                src="/test.jpg"
                srcset="/test.png 2x">
                >
            <button class="foo bar"
                    type="button"
                    formaction="/action_page2.php"
                    >
                OK
            </button>
            <form action="/action_page2.php" method="get" class="form-example">
            </form>
            <script src="javascript.js"></script>
            <strong>
                <pre>    * <----- vous &ecirc;tes ici</pre>
            </strong>
        </body>
    </html>
    """

    respx.get(target_url).mock(return_value=httpx.Response(
        200,
        text=page_content,
        headers=page_headers,
    ))

    resp = httpx.get(target_url, follow_redirects=False)
    page = Page(resp)

    assert page.url == target_url
    assert page.history == []
    assert len(page.headers) == 3
    assert page.headers == page_headers
    assert len(page.cookies) == 0
    assert page.server == "nginx/1.19.0"
    assert page.is_plain is True
    assert page.size == 229
    assert page.raw_size == 229
    assert page.content == page_content
    assert page.bytes == str.encode(page_content)
    assert page.md5 == md5(str.encode(page_content)).hexdigest()
    assert page.status == 200
    assert page.type == "text/plain; charset=utf-8"
    assert len(page.scripts) == 1
    assert page.scripts[0] == "http://perdu.com/javascript.js"
    assert next(page.iter_frames()) == "https://foo.bar/"
    assert page.redirection_url == ""
    assert page.is_directory_redirection is False
    assert len(page.links) == 5
    assert page.links.count(page_links[0]) == 1
    assert page.links.count(page_links[1]) == 1
    assert page.links.count(page_links[2]) == 1
    assert page.links.count(page_links[3]) == 1
    assert page.links.count(page_links[4]) == 1
    assert page.is_external_to_domain(
        'http://perdu.com/blablabla/blablalba/blalba.html') is False
    assert page.is_external_to_domain(
        'http://p3rdu.com/blablabla/blablalba/blalba.html') is True
    assert page.is_internal_to_domain(
        'http://perdu.com/blablabla/blablalba/blalba.html') is True
    assert page.is_internal_to_domain(
        'http://p3rdu.com/blablabla/blablalba/blalba.html') is False
    assert page.title == "Vous Etes Perdu ?"
    assert isinstance(page.soup, BeautifulSoup)
    assert page.base_url is None
    assert len(page.metas) == 4
    assert page.metas.get("color-scheme") == "dark light"
    assert page.description == "test"
    assert page.keywords == ["lost"]
    assert page.generator == "gen"
    assert page.text_only is not None  # @fixme later
    assert page.text_only_md5 is not None  # @fixme
    assert page.favicon_url == target_url + "favicon.ico"
    assert len(page.images_urls) == 1
    assert page.images_urls[0] == target_url + "test.jpg"
    for url in page.extra_urls:
        assert url in page_extra_links
    assert len(page.js_redirections) == 0
    assert len(page.html_redirections) == 0
    assert len(page.all_redirections) == 0
    for request in page.iter_forms():
        assert request.url in page_form_requests
    login_form, username_field, password_field = page.find_login_form()
    assert username_field == 0
    assert password_field == 1
    assert login_form.url == "http://perdu.com/userinfo.php"
    assert login_form.encoded_data == "uname=&pass="