async def test_page_coroutine_pdf(self): if self.browser_type != "chromium": pytest.skip("PDF generation is supported only in Chromium") pdf_file = NamedTemporaryFile(mode="w+b") handler = ScrapyPlaywrightDownloadHandler( get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) ) await handler._launch_browser() with StaticMockServer() as server: req = Request( url=server.urljoin("/index.html"), meta={ "playwright": True, "playwright_page_coroutines": { "pdf": PageCoro("pdf", path=pdf_file.name), }, }, ) await handler._download_request(req, Spider("foo")) assert get_mimetype(pdf_file) == "application/pdf" pdf_file.file.seek(0) assert pdf_file.file.read() == req.meta["playwright_page_coroutines"]["pdf"].result pdf_file.close() await handler.browser.close()
async def test_page_coroutine_navigation(self): handler = ScrapyPlaywrightDownloadHandler( get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) ) await handler._launch_browser() with StaticMockServer() as server: req = Request( url=server.urljoin("/index.html"), meta={ "playwright": True, "playwright_page_coroutines": [PageCoro("click", "a.lorem_ipsum")], }, ) resp = await handler._download_request(req, Spider("foo")) assert isinstance(resp, HtmlResponse) assert resp.request is req assert resp.url == server.urljoin("/lorem_ipsum.html") assert resp.status == 200 assert "playwright" in resp.flags assert resp.css("title::text").get() == "Lorem Ipsum" text = resp.css("p::text").get() assert text == "Lorem ipsum dolor sit amet, consectetur adipiscing elit." await handler.browser.close()
async def test_page_coroutine_screenshot(self): png_file = NamedTemporaryFile(mode="w+b") handler = ScrapyPlaywrightDownloadHandler( get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) ) await handler._launch_browser() with StaticMockServer() as server: req = Request( url=server.urljoin("/index.html"), meta={ "playwright": True, "playwright_page_coroutines": { "png": PageCoro("screenshot", path=png_file.name, type="png"), }, }, ) await handler._download_request(req, Spider("foo")) assert get_mimetype(png_file) == "image/png" png_file.file.seek(0) assert png_file.file.read() == req.meta["playwright_page_coroutines"]["png"].result png_file.close() await handler.browser.close()
async def test_context_args(self): handler = ScrapyPlaywrightDownloadHandler( get_crawler( settings_dict={ "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, "PLAYWRIGHT_CONTEXT_ARGS": {"java_script_enabled": False}, } ) ) await handler._launch_browser() with StaticMockServer() as server: req = Request( url=server.urljoin("/scroll.html"), meta={ "playwright": True, "playwright_page_coroutines": [ PageCoro("wait_for_selector", selector="div.quote", timeout=1000), ], }, ) with pytest.raises(TimeoutError): await handler._download_request(req, Spider("foo")) await handler.browser.close()
async def test_page_coroutine_infinite_scroll(self): handler = ScrapyPlaywrightDownloadHandler( get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) ) await handler._launch_browser() with StaticMockServer() as server: req = Request( url=server.urljoin("/scroll.html"), headers={"User-Agent": "scrapy-playwright"}, meta={ "playwright": True, "playwright_page_coroutines": [ PageCoro("wait_for_selector", selector="div.quote"), PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"), PageCoro("wait_for_selector", selector="div.quote:nth-child(11)"), PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"), PageCoro("wait_for_selector", selector="div.quote:nth-child(21)"), ], }, ) resp = await handler._download_request(req, Spider("foo")) assert isinstance(resp, HtmlResponse) assert resp.request is req assert resp.url == server.urljoin("/scroll.html") assert resp.status == 200 assert "playwright" in resp.flags assert len(resp.css("div.quote")) == 30 await handler.browser.close()
async def make_handler(settings_dict: dict): """Convenience function to obtain an initialized handler and close it gracefully""" from scrapy_playwright.handler import ScrapyPlaywrightDownloadHandler crawler = get_crawler(settings_dict=settings_dict) handler = ScrapyPlaywrightDownloadHandler(crawler=crawler) try: await handler._launch_browser() except: # noqa (E722) pass else: yield handler finally: await handler._close()
async def test_timeout(self): handler = ScrapyPlaywrightDownloadHandler( get_crawler( settings_dict={ "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 1000, } ) ) await handler._launch_browser() with MockServer() as server: req = Request(server.urljoin("/index.html"), meta={"playwright": True}) with pytest.raises(TimeoutError): await handler._download_request(req, Spider("foo")) await handler.browser.close()
async def test_post_request(self): handler = ScrapyPlaywrightDownloadHandler( get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) ) await handler._launch_browser() with MockServer() as server: req = FormRequest( server.urljoin("/"), meta={"playwright": True}, formdata={"foo": "bar"} ) resp = await handler._download_request(req, Spider("foo")) assert resp.request is req assert resp.url == req.url assert resp.status == 200 assert "playwright" in resp.flags assert "Request body: foo=bar" in resp.text await handler.browser.close()
async def test_basic_response(self): handler = ScrapyPlaywrightDownloadHandler( get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) ) await handler._launch_browser() with StaticMockServer() as server: meta = {"playwright": True, "playwright_include_page": True} req = Request(server.urljoin("/index.html"), meta=meta) resp = await handler._download_request(req, Spider("foo")) assert isinstance(resp, HtmlResponse) assert resp.request is req assert resp.url == req.url assert resp.status == 200 assert "playwright" in resp.flags assert resp.css("a::text").getall() == ["Lorem Ipsum", "Infinite Scroll"] assert isinstance(resp.meta["playwright_page"], PlaywrightPage) assert resp.meta["playwright_page"].url == resp.url await resp.meta["playwright_page"].close() await handler.browser.close()
def setUp(self): self.server = StaticMockServer() self.server.__enter__() self.handler = ScrapyPlaywrightDownloadHandler.from_crawler( get_crawler()) yield self.handler._engine_started()