def test_read_rss_proxy(reader_class: Type[FeedReader], httpserver: HTTPServer, status: int): options, url = _setup_rss_proxy(httpserver) with reader_class(**options) as reader: response = reader.read(url + f'?status={status}', use_proxy=True) httpserver.check_assertions() assert response.status == status
def test_wait_for_selector_should_wait_for_element_matching_selector_when_element_exists( httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = ''' <script> setTimeout(function() { var element = document.createElement("div"); element.id = "test"; document.body.appendChild(element); }, 500); </script> ''' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: assert self.find_element('#test') is None self.wait_for_selector('#test', visible=True, timeout=1000) assert self.find_element('#test') is not None def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_select_should_select_options_in_dropdown_list_when_element_is_found(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = ''' <select id="test" multiple> <option value="foo">foo</option> <option value="bar">bar</option> <option value="baz">baz</option> </select> ''' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: values = ['foo', 'bar'] assert self.select('#test', values) == values def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_get_current_page_should_return_current_open_page(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = '<title>Test</title>' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: page = self.get_current_page() assert page.index == 0 assert page.url == request_url assert page.title == 'Test' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_read_rss_proxy(reader_class: Type[FeedReader], rss_proxy_server, httpserver: HTTPServer, status: int): options = rss_proxy_server url = httpserver.url_for('/not-proxy') with reader_class(**options) as reader: response = reader.read(url + f'?status={status}', use_proxy=True) httpserver.check_assertions() assert response.status == status
def test_read_rss_proxy_error(reader_class: Type[FeedReader], rss_proxy_server, httpserver: HTTPServer, error): options = rss_proxy_server url = httpserver.url_for('/not-proxy') with reader_class(**options) as reader: response = reader.read(url + f'?error={error}', use_proxy=True) httpserver.check_assertions() assert response.status == FeedResponseStatus.RSS_PROXY_ERROR
def test_get_cookies_should_return_cookies_for_the_current_page(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) headers = {'Set-Cookie': 'cookie_name=cookie_value'} httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(headers=headers) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: cookies = self.get_cookies() assert len(cookies) == 1 assert cookies[0].name == 'cookie_name' assert cookies[0].value == 'cookie_value' assert cookies[0].domain == 'localhost' assert cookies[0].path == '/' assert cookies[0].expires == -1 assert cookies[0].http_only is False assert cookies[0].secure is False assert cookies[0].session is True assert cookies[0].same_site is None def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_double_click_should_double_click_element_when_element_exists(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = ''' <button id="button" ondblclick="onDoubleClick()">Test button</div> <script> function onDoubleClick() { document.getElementById("button").textContent = "Double clicked!"; } </script> ''' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: self.double_click('#button') assert self.find_element('#button').get_text() == 'Double clicked!' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
async def test_request_failure( self, event_loop: asyncio.AbstractEventLoop, expected_message: str, httpserver: HTTPServer, influxdb_writer: InfluxDBWriter, log_records: LogRecordsType, mocker: MockerFixture, resp_json: Dict[str, str], ) -> None: httpserver.expect_oneshot_request( "/influx/api/v2/write", method="POST", data="measurement,tag=tagval field=1.0 ", headers={ "Authorization": "Token test_token" }, query_string={ "org": "test_org", "bucket": "test_bucket", "precision": "s" }, ).respond_with_json(resp_json, status=404) task = event_loop.create_task(influxdb_writer.task()) influxdb_writer.put(mocker.Mock()) await asyncio.sleep(0.1) assert len(httpserver.log) > 0 httpserver.check_assertions() last_log_record = log_records()[-1] assert last_log_record.levelno == logging.ERROR assert expected_message in last_log_record.message task.cancel() with contextlib.suppress(asyncio.CancelledError): await task
def test_stop_should_stop_crawler_before_processing_next_request(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data() class TestCrawler(Crawler): response_count = 0 def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(first_page_url), CrawlRequest(second_page_url)]) def on_response_success(self, response: CrawlResponse) -> None: self.response_count += 1 self.stop() def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' crawler = TestCrawler() crawler.start() assert crawler.response_count == 1 httpserver.check_assertions()
def test_click_and_wait_should_raise_no_such_element_error_when_element_does_not_exist(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) first_page_response_data = f''' <title>First page</title> <a id="link" href="{second_page_url}">Go to second page</a> ''' httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data(content_type='text/html', response_data=first_page_response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(first_page_url)]) def on_response_success(self, response: CrawlResponse) -> None: with pytest.raises(NoSuchElementError) as exc_info: self.click_and_wait('#nonexistent', timeout=1000) assert str(exc_info.value) == 'Unable to locate element using selector #nonexistent' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
async def test_request_failure( self, event_loop: asyncio.AbstractEventLoop, fake_message: Any, httpserver: HTTPServer, log_records: LogRecordsType, messaging_writer: FrontendMessagingWriter, testcase: RequestFailureTestCase, ) -> None: httpserver.expect_oneshot_request( "/api", headers={ "Authorization": "apikey api_key" }, ).respond_with_json(testcase.response_json, status=testcase.response_status) task = event_loop.create_task(messaging_writer.task()) messaging_writer.put(fake_message) await asyncio.sleep(0.1) httpserver.check_assertions() last_log_record = log_records()[-1] assert last_log_record.levelno == logging.ERROR assert testcase.logged_error_contains in last_log_record.message task.cancel() with contextlib.suppress(asyncio.CancelledError): await task
async def test_request_success( self, event_loop: asyncio.AbstractEventLoop, fake_message: Any, httpserver: HTTPServer, log_records: LogRecordsType, messaging_writer: FrontendMessagingWriter, testcase: RequestSuccesTestCase, ) -> None: httpserver.expect_oneshot_request( "/api", method="POST", headers={ "Authorization": "apikey api_key" }, json={ "method": "publish", "params": { "channel": testcase.expected_msg_type, "data": { "payload": testcase.expected_payload, }, }, }, ).respond_with_json({}) task = event_loop.create_task(messaging_writer.task()) if not testcase.timeout: messaging_writer.put(fake_message) await asyncio.sleep(0.6 if testcase.timeout else 0.1) httpserver.check_assertions() assert not any(r.levelno == logging.ERROR for r in log_records()) task.cancel() with contextlib.suppress(asyncio.CancelledError): await task
def test_set_cookie_should_set_cookie(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) headers = {'Cookie': 'cookie_name=cookie_value'} httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data() httpserver.expect_ordered_request(second_page_path, method='HEAD', headers=headers).respond_with_data() httpserver.expect_ordered_request(second_page_path, method='GET', headers=headers).respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([ CrawlRequest(first_page_url, success_func=self.on_first_page_response), CrawlRequest(second_page_url) ]) def on_first_page_response(self, _: CrawlResponse) -> None: self.set_cookie(Cookie('cookie_name', 'cookie_value')) def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_request_redirect_handling(httpserver: HTTPServer) -> None: redirect_origin_path = '/redirect-origin' redirect_target_path = '/redirect-target' redirect_origin_url = httpserver.url_for(redirect_origin_path) redirect_target_url = httpserver.url_for(redirect_target_path) headers = {'Location': redirect_target_url} httpserver.expect_ordered_request(redirect_origin_path, method='HEAD').respond_with_data(status=301, headers=headers) httpserver.expect_ordered_request(redirect_target_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(redirect_target_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(redirect_origin_url)]) def on_request_redirect(self, response: CrawlResponse, redirected_request: CrawlRequest) -> None: assert response.request.url == redirect_origin_url assert redirected_request.url == redirect_target_url assert response.status == 301 assert len(response.headers) > 0 assert response.text is None def on_response_success(self, response: CrawlResponse) -> None: assert response.request.url == redirect_target_url assert response.status == 200 assert len(response.headers) > 0 assert response.text == '' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_querystring_str(httpserver: HTTPServer): httpserver.expect_request( "/foobar", query_string="foo=bar", method="GET").respond_with_data("example_response") response = requests.get(httpserver.url_for("/foobar?foo=bar")) httpserver.check_assertions() assert response.text == "example_response" assert response.status_code == 200
def test_request_post_case_insensitive_method(httpserver: HTTPServer): httpserver.expect_request( "/foobar", data='{"request": "example"}', method="post").respond_with_data("example_response") response = requests.post(httpserver.url_for("/foobar"), json={"request": "example"}) httpserver.check_assertions() assert response.text == "example_response" assert response.status_code == 200
def test_check_handler_errors_raises_handler_error(httpserver: HTTPServer): def handler(_) -> werkzeug.Response: raise ValueError("should be propagated") httpserver.expect_request("/foobar").respond_with_handler(handler) requests.get(httpserver.url_for("/foobar")) httpserver.check_assertions() with pytest.raises(ValueError): httpserver.check_handler_errors()
def test_check_assertions_raises_handler_assertions(httpserver: HTTPServer): def handler(_): assert 1 == 2 httpserver.expect_request("/foobar").respond_with_handler(handler) requests.get(httpserver.url_for("/foobar")) with pytest.raises(AssertionError): httpserver.check_assertions() httpserver.check_handler_errors()
def test_upload_study_no_verify_success(httpserver: HTTPServer): eas_client = EasClient( LOCALHOST, httpserver.port, verify_certificate=False ) httpserver.expect_oneshot_request("/api/graphql").respond_with_data("OK") res = eas_client.upload_study(Study("Test study", "description", ["tag"], [Result("Huge success")], [])) httpserver.check_assertions() assert res.status_code == 200 assert res.text == "OK"
def test_upload_study_valid_certificate_success(ca: trustme.CA, httpserver: HTTPServer): with ca.cert_pem.tempfile() as ca_filename: eas_client = EasClient( LOCALHOST, httpserver.port, verify_certificate=True, ca_filename=ca_filename ) httpserver.expect_oneshot_request("/api/graphql").respond_with_data("OK") res = eas_client.upload_study(Study("Test study", "description", ["tag"], [Result("Huge success")], [])) httpserver.check_assertions() assert res.status_code == 200 assert res.text == "OK"
def test_all_ordered_missing(httpserver: HTTPServer): _setup_all(httpserver) # ordered is missing so everything must fail # a.k.a. permanently fail requests.get(httpserver.url_for("/permanent")) with pytest.raises(AssertionError): httpserver.check_assertions() requests.get(httpserver.url_for("/oneshot2")) with pytest.raises(AssertionError): httpserver.check_assertions() requests.get(httpserver.url_for("/oneshot1")) with pytest.raises(AssertionError): httpserver.check_assertions() requests.get(httpserver.url_for("/permanent")) with pytest.raises(AssertionError): httpserver.check_assertions() # handlers must be still intact but as the ordered are failed # everything will fail assert len(httpserver.ordered_handlers) == 2 assert len(httpserver.oneshot_handlers) == 2 assert len(httpserver.handlers) == 1
def test_querystring_dict(httpserver: HTTPServer): httpserver.expect_request( "/foobar", query_string={ "k1": "v1", "k2": "v2" }, method="GET").respond_with_data("example_response") response = requests.get(httpserver.url_for("/foobar?k1=v1&k2=v2")) httpserver.check_assertions() assert response.text == "example_response" assert response.status_code == 200 response = requests.get(httpserver.url_for("/foobar?k2=v2&k1=v1")) httpserver.check_assertions() assert response.text == "example_response" assert response.status_code == 200
def test_custom_request_header_handling(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD', headers={'foo': 'bar'}).respond_with_data() httpserver.expect_ordered_request(request_path, method='GET', headers={'foo': 'bar'}).respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url, headers={'foo': 'bar'})]) def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_find_element_should_return_none_when_element_is_not_found(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: assert self.find_element('#nonexistent') is None def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_get_url_should_return_current_page_url(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: assert self.get_url() == request_url def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_get_pages_should_return_all_pages(httpserver: HTTPServer) -> None: first_page_path = '/first-page' second_page_path = '/second-page' first_page_url = httpserver.url_for(first_page_path) second_page_url = httpserver.url_for(second_page_path) first_page_response_data = f''' <title>First page</title> <a id="link" href="{second_page_url}" target="_blank">Go to second page</a> ''' second_page_response_data = f'<title>Second page</title>' httpserver.expect_ordered_request(first_page_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(first_page_path, method='GET').respond_with_data(content_type='text/html', response_data=first_page_response_data) httpserver.expect_ordered_request(second_page_path, method='GET').respond_with_data(content_type='text/html', response_data=second_page_response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(first_page_url)]) def on_response_success(self, response: CrawlResponse) -> None: self.click('#link') self.wait_for_timeout(500) pages = self.get_pages() assert len(pages) == 2 assert pages[0].index == 0 assert pages[0].url == first_page_url assert pages[0].title == 'First page' assert pages[1].index == 1 assert pages[1].url == second_page_url assert pages[1].title == 'Second page' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_evaluate_should_evaluate_function_when_element_is_found(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) response_data = '<div id="test">Test</div>' httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data(content_type='text/html') httpserver.expect_ordered_request(request_path, method='GET').respond_with_data(content_type='text/html', response_data=response_data) class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: assert self.evaluate('#test', 'element => element.textContent') == 'Test' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()
def test_ordered_ok(httpserver: HTTPServer): httpserver.expect_ordered_request("/foobar").respond_with_data("OK foobar") httpserver.expect_ordered_request("/foobaz").respond_with_data("OK foobaz") assert len(httpserver.ordered_handlers) == 2 # first requests should pass response = requests.get(httpserver.url_for("/foobar")) httpserver.check_assertions() assert response.status_code == 200 assert response.text == "OK foobar" response = requests.get(httpserver.url_for("/foobaz")) httpserver.check_assertions() assert response.status_code == 200 assert response.text == "OK foobaz" assert len(httpserver.ordered_handlers) == 0 # second requests should fail due to 'oneshot' type assert requests.get(httpserver.url_for("/foobar")).status_code == 500 assert requests.get(httpserver.url_for("/foobaz")).status_code == 500
def test_type_should_raise_no_such_element_error_when_element_is_not_found(httpserver: HTTPServer) -> None: request_path = '/page' request_url = httpserver.url_for(request_path) httpserver.expect_ordered_request(request_path, method='HEAD').respond_with_data() httpserver.expect_ordered_request(request_path, method='GET').respond_with_data() class TestCrawler(Crawler): def configure(self) -> CrawlerConfiguration: return CrawlerConfiguration([CrawlRequest(request_url)]) def on_response_success(self, response: CrawlResponse) -> None: with pytest.raises(NoSuchElementError) as exc_info: self.type('#nonexistent', 'Test') assert str(exc_info.value) == 'Unable to locate element using selector #nonexistent' def on_response_error(self, response: CrawlResponse) -> None: assert False, f'Response error: {response}' TestCrawler().start() httpserver.check_assertions()