def test_iter_frame_page(): target_url_1 = "http://perdu.com/" page_content_1 = """ <html> <head> <title>Foobar</title> </head> <body> <iframe id="inlineFrameExample" title="Inline Frame Example" width="300" height="200" src="http://example.com"> </iframe> </body> </html> """ respx.get(target_url_1).mock(return_value=httpx.Response( 200, text=page_content_1, )) # basic html resp = httpx.get(target_url_1, follow_redirects=False) page = Page(resp) assert next(page.iter_frames()) == "http://example.com/"
def test_page(): target_url = "http://perdu.com/" page_headers = [('server', 'nginx/1.19.0'), ('content-length', '229'), ('content-type', 'text/plain; charset=utf-8')] page_links = [ 'http://perdu.com/action_page2.php', 'https://foo.bar/', 'https://abc.abc/', 'http://perdu.com/action_page.php', 'http://perdu.com/userinfo.php', ] page_extra_links = [ 'https://bar.foo/', 'http://perdu.com/test.jpg', 'http://perdu.com/javascript.js', 'http://perdu.com/test.swf', 'http://perdu.com/test.swf', 'http://perdu.com/test.png', ] page_form_requests = [ 'http://perdu.com/action_page.php', 'http://perdu.com/action_page2.php', 'http://perdu.com/userinfo.php', ] page_content = """ <html> <head> <title>Vous Etes Perdu ?</title> <meta name="color-scheme" content="dark light"> <meta name="description" content="test"> <meta name="keywords" content="lost"> <meta name="generator" content="gen"> </head> <body> <h1>Perdu sur l'Internet ?</h1> <h2>Pas de panique, on va vous aider</h2> <iframe id="foobarframe" title="Foobar Frame" width="300" height="200" src="https://foo.bar/"> </iframe> <form name="loginform" method="post" action="userinfo.php"> <table cellpadding="4" cellspacing="1"> <tr><td>Username : </td><td><input name="uname" type="text" size="20" style="width:120px;"></td></tr> <tr><td>Password : </td><td><input name="pass" type="password" size="20" style="width:120px;"></td></tr> <tr><td colspan="2" align="right"><input type="submit" value="login" style="width:75px;"></td></tr> </table> </form> <area shape="rect" coords="184,6,253,27" href="https://bar.foo" target="_blank" /> <p>hello</p> <object type="application/x-shockwave-flash" data="/test.swf" width="800" height="360"> <param name="movie" value="/test.swf"> <param name="wmode" value="transparent"> <p>You need to enable Flash to view this content.</p> </object> <a href="https://abc.abc/"></a> <form action="/action_page.php" method="get" class="form-example"> </form> <img class="picture" src="/test.jpg" srcset="/test.png 2x"> > <button class="foo bar" type="button" formaction="/action_page2.php" > OK </button> <form action="/action_page2.php" method="get" class="form-example"> </form> <script src="javascript.js"></script> <strong> <pre> * <----- vous êtes ici</pre> </strong> </body> </html> """ respx.get(target_url).mock(return_value=httpx.Response( 200, text=page_content, headers=page_headers, )) resp = httpx.get(target_url, follow_redirects=False) page = Page(resp) assert page.url == target_url assert page.history == [] assert len(page.headers) == 3 assert page.headers == page_headers assert len(page.cookies) == 0 assert page.server == "nginx/1.19.0" assert page.is_plain is True assert page.size == 229 assert page.raw_size == 229 assert page.content == page_content assert page.bytes == str.encode(page_content) assert page.md5 == md5(str.encode(page_content)).hexdigest() assert page.status == 200 assert page.type == "text/plain; charset=utf-8" assert len(page.scripts) == 1 assert page.scripts[0] == "http://perdu.com/javascript.js" assert next(page.iter_frames()) == "https://foo.bar/" assert page.redirection_url == "" assert page.is_directory_redirection is False assert len(page.links) == 5 assert page.links.count(page_links[0]) == 1 assert page.links.count(page_links[1]) == 1 assert page.links.count(page_links[2]) == 1 assert page.links.count(page_links[3]) == 1 assert page.links.count(page_links[4]) == 1 assert page.is_external_to_domain( 'http://perdu.com/blablabla/blablalba/blalba.html') is False assert page.is_external_to_domain( 'http://p3rdu.com/blablabla/blablalba/blalba.html') is True assert page.is_internal_to_domain( 'http://perdu.com/blablabla/blablalba/blalba.html') is True assert page.is_internal_to_domain( 'http://p3rdu.com/blablabla/blablalba/blalba.html') is False assert page.title == "Vous Etes Perdu ?" assert isinstance(page.soup, BeautifulSoup) assert page.base_url is None assert len(page.metas) == 4 assert page.metas.get("color-scheme") == "dark light" assert page.description == "test" assert page.keywords == ["lost"] assert page.generator == "gen" assert page.text_only is not None # @fixme later assert page.text_only_md5 is not None # @fixme assert page.favicon_url == target_url + "favicon.ico" assert len(page.images_urls) == 1 assert page.images_urls[0] == target_url + "test.jpg" for url in page.extra_urls: assert url in page_extra_links assert len(page.js_redirections) == 0 assert len(page.html_redirections) == 0 assert len(page.all_redirections) == 0 for request in page.iter_forms(): assert request.url in page_form_requests login_form, username_field, password_field = page.find_login_form() assert username_field == 0 assert password_field == 1 assert login_form.url == "http://perdu.com/userinfo.php" assert login_form.encoded_data == "uname=&pass="