def test_double_click_should_double_click_element_when_element_exists(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = ''' <button id="button" ondblclick="onDoubleClick()">Test button</div> <script> function onDoubleClick() { document.getElementById("button").textContent = "Double clicked!"; } </script> ''' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: self.double_click('#button') assert self.find_element('#button').get_text() == 'Double clicked!' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_get_cookies_should_return_cookies_for_the_current_page(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) headers = {'Set-Cookie': 'cookie_name=cookie_value'} httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(headers=headers) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: cookies = self.get_cookies() assert len(cookies) == 1 assert cookies[0].name == 'cookie_name' assert cookies[0].value == 'cookie_value' assert cookies[0].domain == 'localhost' assert cookies[0].path == '/' assert cookies[0].expires == -1 assert cookies[0].http_only is False assert cookies[0].secure is False assert cookies[0].session is True assert cookies[0].same_site is None def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_wait_for_selector_should_wait_for_element_matching_selector_when_element_exists( httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = ''' <script> setTimeout(function() { var element = document.createElement("div"); element.id = "test"; document.body.appendChild(element); }, 500); </script> ''' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: assert self.find_element('#test') is None self.wait_for_selector('#test', visible=True, timeout=1000) assert self.find_element('#test') is not None def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_stop_should_stop_crawler_before_processing_next_request(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data() class TestCrawler(Crawler): response_count = 0 def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(first_page_url), CrawlRequest(second_page_url)]) def on_response_success(self, response: CrawlResponse) -> None: self.response_count += 1 self.stop() def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' crawler = TestCrawler() crawler.start() assert crawler.response_count == 1 httpserver.check_assertions()
def test_click_and_wait_should_raise_no_such_element_error_when_element_does_not_exist(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) first_page_response_data = f''' <title>First page</title> <a id="link" href="{second_page_url}">Go to second page</a> ''' httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data(content_type='text/html', response_data=first_page_response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(first_page_url)]) def on_response_success(self, response: CrawlResponse) -> None: with pytest.raises(NoSuchElementError) as exc_info: self.click_and_wait('#nonexistent', timeout=1000) assert str(exc_info.value) == 'Unable to locate element using selector #nonexistent' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_select_should_select_options_in_dropdown_list_when_element_is_found(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = ''' <select id="test" multiple> <option value="foo">foo</option> <option value="bar">bar</option> <option value="baz">baz</option> </select> ''' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: values = ['foo', 'bar'] assert self.select('#test', values) == values def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_get_current_page_should_return_current_open_page(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = '<title>Test</title>' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: page = self.get_current_page() assert page.index == 0 assert page.url == request_url assert page.title == 'Test' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_set_cookie_should_set_cookie(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) headers = {'Cookie': 'cookie_name=cookie_value'} httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data() httpserver.expect_ordered_request(second_page_path, method='HEAD', headers=headers).respond_with_data() httpserver.expect_ordered_request(second_page_path, method='GET', headers=headers).respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([ CrawlRequest(first_page_url, success_func=self.on_first_page_response), CrawlRequest(second_page_url) ]) def on_first_page_response(self, _: CrawlResponse) -> None: self.set_cookie(Cookie('cookie_name', 'cookie_value')) def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_wait_unexpected_request(httpserver: HTTPServer): def make_unexpected_request_and_wait() -> None: with raises(AssertionError) as error: waiting_timeout = 0.1 with httpserver.wait(raise_assertions=True, stop_on_nohandler=True, timeout=waiting_timeout) as waiting: requests.get(httpserver.url_for("/foobaz")) assert not waiting.result no_handler_text = 'No handler found for request' assert no_handler_text in str(error.value) make_unexpected_request_and_wait() httpserver.expect_ordered_request("/foobar").respond_with_data("OK foobar") httpserver.expect_ordered_request("/foobaz").respond_with_data("OK foobaz") make_unexpected_request_and_wait()
def test_ordered_invalid_order(httpserver: HTTPServer): httpserver.expect_ordered_request("/foobar").respond_with_data("OK foobar") httpserver.expect_ordered_request("/foobaz").respond_with_data("OK foobaz") assert len(httpserver.ordered_handlers) == 2 # these would not pass as the order is different # this mark the whole thing 'permanently failed' so no further requests must pass response = requests.get(httpserver.url_for("/foobaz")) assert response.status_code == 500 response = requests.get(httpserver.url_for("/foobar")) assert response.status_code == 500 # as no ordered handlers are triggered yet, these must be intact.. assert len(httpserver.ordered_handlers) == 2
def test_custom_request_header_handling(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD', headers={'foo': 'bar'}).respond_with_data() httpserver.expect_ordered_request(request_path, method='GET', headers={'foo': 'bar'}).respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url, headers={'foo': 'bar'})]) def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_get_url_should_return_current_page_url(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: assert self.get_url() == request_url def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_find_element_should_return_none_when_element_is_not_found(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: assert self.find_element('#nonexistent') is None def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_evaluate_should_evaluate_function_when_element_is_found(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = '<div id="test">Test</div>' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: assert self.evaluate('#test', 'element => element.textContent') == 'Test' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_request_redirect_handling(httpserver: HTTPServer) -> None: redirect_origin_path = '/redirect-origin' redirect_target_path = '/redirect-target' redirect_origin_url = httpserver.url_for(redirect_origin_path) redirect_target_url = httpserver.url_for(redirect_target_path) headers = {'Location': redirect_target_url} httpserver.expect_ordered_request(redirect_origin_path, method='HEAD').respond_with_data(status=301, headers=headers) httpserver.expect_ordered_request(redirect_target_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(redirect_target_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(redirect_origin_url)]) def on_request_redirect(self, response: CrawlResponse, redirected_request: CrawlRequest) -> None: assert response.request.url == redirect_origin_url assert redirected_request.url == redirect_target_url assert response.status == 301 assert len(response.headers) > 0 assert response.text is None def on_response_success(self, response: CrawlResponse) -> None: assert response.request.url == redirect_target_url assert response.status == 200 assert len(response.headers) > 0 assert response.text == '' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_request_error_handling(httpserver: HTTPServer) -> None: request_path = '/response-error' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(status=500) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: assert False, f'Response success: {response}' def on_response_error(self, response: CrawlResponse) -> None: assert response.request.url == request_url assert response.status == 500 assert len(response.headers) > 0 assert response.text == '' TestCrawler().start() httpserver.check_assertions()
def test_ordered_ok(httpserver: HTTPServer): httpserver.expect_ordered_request("/foobar").respond_with_data("OK foobar") httpserver.expect_ordered_request("/foobaz").respond_with_data("OK foobaz") assert len(httpserver.ordered_handlers) == 2 # first requests should pass response = requests.get(httpserver.url_for("/foobar")) httpserver.check_assertions() assert response.status_code == 200 assert response.text == "OK foobar" response = requests.get(httpserver.url_for("/foobaz")) httpserver.check_assertions() assert response.status_code == 200 assert response.text == "OK foobaz" assert len(httpserver.ordered_handlers) == 0 # second requests should fail due to 'oneshot' type assert requests.get(httpserver.url_for("/foobar")).status_code == 500 assert requests.get(httpserver.url_for("/foobaz")).status_code == 500
def test_type_should_raise_no_such_element_error_when_element_is_not_found(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: with pytest.raises(NoSuchElementError) as exc_info: self.type('#nonexistent', 'Test') assert str(exc_info.value) == 'Unable to locate element using selector #nonexistent' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_wait_for_selector_should_raise_wait_timeout_error_when_element_does_not_exist(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: with pytest.raises(WaitTimeoutError) as exc_info: self.wait_for_selector('#test', visible=True, timeout=1) assert str(exc_info.value) == 'Timeout 1ms exceeded waiting for selector #test' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_switch_to_page_should_raise_no_such_page_error_when_page_does_not_exist(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: with pytest.raises(NoSuchPageError) as exc_info: self.switch_to_page(BrowserPage(1, request_url, 'Nonexistent')) assert str(exc_info.value) == 'No page exists with index 1' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_close_page_should_raise_value_error_when_there_is_only_one_page(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: page = self.get_current_page() with pytest.raises(ValueError) as exc_info: self.close_page(page) assert str(exc_info.value) == 'Cannot close the last page' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_crawl_should_add_request_to_queue(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data() httpserver.expect_ordered_request(second_page_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(second_page_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(first_page_url, success_func=self.on_first_response)]) def on_first_response(self, _: CrawlResponse) -> None: assert self.crawl(CrawlRequest(second_page_url)) is True def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_get_pages_should_return_all_pages(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) first_page_response_data = f''' <title>First page</title> <a id="link" href="{second_page_url}" target="_blank">Go to second page</a> ''' second_page_response_data = f'<title>Second page</title>' httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data(content_type='text/html', response_data=first_page_response_data) httpserver.expect_ordered_request(second_page_path, method='GET').respond_with_data(content_type='text/html', response_data=second_page_response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(first_page_url)]) def on_response_success(self, response: CrawlResponse) -> None: self.click('#link') self.wait_for_timeout(500) pages = self.get_pages() assert len(pages) == 2 assert pages[0].index == 0 assert pages[0].url == first_page_url assert pages[0].title == 'First page' assert pages[1].index == 1 assert pages[1].url == second_page_url assert pages[1].title == 'Second page' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_click_and_wait_should_raise_navigation_timeout_error_when_timeout_is_exceeded(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) first_page_response_data = f''' <title>First page</title> <a id="link" href="{second_page_url}">Go to second page</a> ''' second_page_response_data = f'<title>Second page</title>' def handle_request(_: Request) -> Response: time.sleep(0.5) return Response(second_page_response_data, 200, None, None, 'text/html') httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data(content_type='text/html', response_data=first_page_response_data) httpserver.expect_ordered_request(second_page_path, method='GET').respond_with_handler(handle_request) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(first_page_url)]) def on_response_success(self, response: CrawlResponse) -> None: with pytest.raises(NavigationTimeoutError) as exc_info: self.click_and_wait('#link', timeout=1) assert str(exc_info.value) == 'Timeout 1ms exceeded waiting for navigation' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_click_should_click_element_when_element_is_found(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) response_data = f'<a id="link" href="{second_page_url}">Click me</a>' httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) httpserver.expect_ordered_request(second_page_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(first_page_url)]) def on_response_success(self, response: CrawlResponse) -> None: self.click('#link') def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def _setup_ordered(server: HTTPServer): server.expect_ordered_request("/ordered1").respond_with_data("OK ordered1") server.expect_ordered_request("/ordered2").respond_with_data("OK ordered2")