예제 #1
0
    async def test_timeout_value(self):
        settings_dict = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
        }
        async with make_handler(settings_dict) as handler:
            assert handler.default_navigation_timeout is None

        settings_dict = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
            "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": None,
        }
        async with make_handler(settings_dict) as handler:
            assert handler.default_navigation_timeout is None

        settings_dict = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
            "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 0,
        }
        async with make_handler(settings_dict) as handler:
            assert handler.default_navigation_timeout == 0

        settings_dict = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
            "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 123,
        }
        async with make_handler(settings_dict) as handler:
            assert handler.default_navigation_timeout == 123
        settings_dict = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
            "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 0.5,
        }
        async with make_handler(settings_dict) as handler:
            assert handler.default_navigation_timeout == 0.5
예제 #2
0
    async def test_use_custom_headers(self):
        """Custom header processing function"""
        async def important_headers(*args, **kwargs) -> dict:
            return {"foo": "bar"}

        settings_dict = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
            "PLAYWRIGHT_CONTEXTS": {
                "default": {
                    "user_agent": self.browser_type
                }
            },
            "PLAYWRIGHT_PROCESS_REQUEST_HEADERS": important_headers,
        }
        async with make_handler(settings_dict) as handler:
            with MockServer() as server:
                req = Request(
                    url=server.urljoin("/headers"),
                    meta={"playwright": True},
                    headers={
                        "User-Agent": "foobar",
                        "Asdf": "qwerty"
                    },
                )
                resp = await handler._download_request(req, Spider("foo"))
                headers = json.loads(resp.css("pre::text").get())
                headers = {
                    key.lower(): value
                    for key, value in headers.items()
                }
                assert headers["foo"] == "bar"
                assert headers.get("user-agent") not in (self.browser_type,
                                                         "foobar")
                assert "asdf" not in headers
예제 #3
0
 async def test_use_playwright_headers(self):
     """Ignore Scrapy headers"""
     settings_dict = {
         "PLAYWRIGHT_BROWSER_TYPE":
         self.browser_type,
         "PLAYWRIGHT_CONTEXTS": {
             "default": {
                 "user_agent": self.browser_type
             }
         },
         "PLAYWRIGHT_PROCESS_REQUEST_HEADERS":
         "scrapy_playwright.headers.use_playwright_headers",  # noqa: E501
     }
     async with make_handler(settings_dict) as handler:
         with MockServer() as server:
             req = Request(
                 url=server.urljoin("/headers"),
                 meta={"playwright": True},
                 headers={
                     "User-Agent": "foobar",
                     "Asdf": "qwerty"
                 },
             )
             resp = await handler._download_request(req, Spider("foo"))
             headers = json.loads(resp.css("pre::text").get())
             headers = {
                 key.lower(): value
                 for key, value in headers.items()
             }
             assert headers["user-agent"] == self.browser_type
             assert "asdf" not in headers
    async def test_contexts_dynamic(self):
        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:

            with StaticMockServer() as server:
                meta = {
                    "playwright": True,
                    "playwright_include_page": True,
                    "playwright_context": "new",
                    "playwright_context_kwargs": {
                        "storage_state": {
                            "cookies": [
                                {
                                    "url": "https://example.org",
                                    "name": "asdf",
                                    "value": "qwerty",
                                },
                            ],
                        },
                    },
                }
                req = Request(server.urljoin("/index.html"), meta=meta)
                resp = await handler._download_request(req, Spider("foo"))

            page = resp.meta["playwright_page"]
            storage_state = await page.context.storage_state()
            await page.close()
            cookie = storage_state["cookies"][0]
            assert cookie["name"] == "asdf"
            assert cookie["value"] == "qwerty"
            assert cookie["domain"] == "example.org"
예제 #5
0
 async def test_context_kwargs(self):
     settings_dict = {
         "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
         "PLAYWRIGHT_CONTEXTS": {
             "default": {
                 "java_script_enabled": False
             },
         },
     }
     async with make_handler(settings_dict) as handler:
         with StaticMockServer() as server:
             req = Request(
                 url=server.urljoin("/scroll.html"),
                 meta={
                     "playwright":
                     True,
                     "playwright_page_coroutines": [
                         PageCoro("wait_for_selector",
                                  selector="div.quote",
                                  timeout=1000),
                     ],
                 },
             )
             with pytest.raises(TimeoutError):
                 await handler._download_request(req, Spider("foo"))
    async def test_contexts_startup(self):
        settings = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
            "PLAYWRIGHT_CONTEXTS": {
                "first": {
                    "storage_state": {
                        "cookies": [
                            {
                                "url": "https://example.org",
                                "name": "foo",
                                "value": "bar",
                            },
                        ],
                    },
                },
            },
        }
        async with make_handler(settings) as handler:
            with StaticMockServer() as server:
                meta = {
                    "playwright": True,
                    "playwright_include_page": True,
                    "playwright_context": "first",
                }
                req = Request(server.urljoin("/index.html"), meta=meta)
                resp = await handler._download_request(req, Spider("foo"))

            page = resp.meta["playwright_page"]
            storage_state = await page.context.storage_state()
            await page.context.close()
            await page.close()
            cookie = storage_state["cookies"][0]
            assert cookie["name"] == "foo"
            assert cookie["value"] == "bar"
            assert cookie["domain"] == "example.org"
예제 #7
0
 async def test_timeout(self):
     settings_dict = {
         "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
         "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 1000,
     }
     async with make_handler(settings_dict) as handler:
         with MockServer() as server:
             req = Request(server.urljoin("/delay/2"),
                           meta={"playwright": True})
             with pytest.raises(TimeoutError):
                 await handler._download_request(req, Spider("foo"))
예제 #8
0
    async def test_post_request(self):
        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:
            with MockServer() as server:
                req = FormRequest(server.urljoin("/delay/2"),
                                  meta={"playwright": True},
                                  formdata={"foo": "bar"})
                resp = await handler._download_request(req, Spider("foo"))

            assert resp.request is req
            assert resp.url == req.url
            assert resp.status == 200
            assert "playwright" in resp.flags
            assert "Request body: foo=bar" in resp.text
예제 #9
0
    async def test_basic_response(self):
        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:
            with StaticMockServer() as server:
                meta = {"playwright": True, "playwright_include_page": True}
                req = Request(server.urljoin("/index.html"), meta=meta)
                resp = await handler._download_request(req, Spider("foo"))

            assert isinstance(resp, HtmlResponse)
            assert resp.request is req
            assert resp.url == req.url
            assert resp.status == 200
            assert "playwright" in resp.flags
            assert resp.css("a::text").getall() == [
                "Lorem Ipsum", "Infinite Scroll"
            ]
            assert isinstance(resp.meta["playwright_page"], PlaywrightPage)
            assert resp.meta["playwright_page"].url == resp.url

            await resp.meta["playwright_page"].close()
예제 #10
0
    async def test_event_handler_dialog_str(self):
        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:
            with StaticMockServer() as server:
                spider = DialogSpider()
                req = Request(
                    url=server.urljoin("/index.html"),
                    meta={
                        "playwright":
                        True,
                        "playwright_page_coroutines": [
                            PageCoro("evaluate", "alert('foobar');"),
                        ],
                        "playwright_page_event_handlers": {
                            "dialog": "handle_dialog",
                        },
                    },
                )
                await handler._download_request(req, spider)

            assert spider.dialog_message == "foobar"
    async def test_deprecated_setting(self):
        settings = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
            "PLAYWRIGHT_CONTEXT_ARGS": {
                "storage_state": {
                    "cookies": [
                        {
                            "url": "https://example.org",
                            "name": "asdf",
                            "value": "qwerty",
                        },
                    ],
                },
            },
        }
        with warnings.catch_warnings(record=True) as warning_list:
            async with make_handler(settings) as handler:
                assert warning_list[0].category is DeprecationWarning
                assert str(warning_list[0].message) == (
                    "The PLAYWRIGHT_CONTEXT_ARGS setting is deprecated, please use"
                    " PLAYWRIGHT_CONTEXTS instead. Keyword arguments defined in"
                    " PLAYWRIGHT_CONTEXT_ARGS will be used when creating the 'default' context"
                )

                with StaticMockServer() as server:
                    meta = {
                        "playwright": True,
                        "playwright_include_page": True,
                    }
                    req = Request(server.urljoin("/index.html"), meta=meta)
                    resp = await handler._download_request(req, Spider("foo"))

                page = resp.meta["playwright_page"]
                storage_state = await page.context.storage_state()
                await page.close()
                cookie = storage_state["cookies"][0]
                assert cookie["name"] == "asdf"
                assert cookie["value"] == "qwerty"
                assert cookie["domain"] == "example.org"
예제 #12
0
    async def test_user_agent(self):
        settings_dict = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
            "PLAYWRIGHT_CONTEXTS": {
                "default": {
                    "user_agent": self.browser_type
                }
            },
            "USER_AGENT": None,
        }
        async with make_handler(settings_dict) as handler:
            with MockServer() as server:
                # if Scrapy's user agent is None, use the one from the Browser
                req = Request(
                    url=server.urljoin("/headers"),
                    meta={"playwright": True},
                )
                resp = await handler._download_request(req, Spider("foo"))
                headers = json.loads(resp.css("pre::text").get())
                headers = {
                    key.lower(): value
                    for key, value in headers.items()
                }
                assert headers["user-agent"] == self.browser_type

                # if Scrapy's user agent is set to some value, use it
                req = Request(
                    url=server.urljoin("/headers"),
                    meta={"playwright": True},
                    headers={"User-Agent": "foobar"},
                )
                resp = await handler._download_request(req, Spider("foo"))
                headers = json.loads(resp.css("pre::text").get())
                headers = {
                    key.lower(): value
                    for key, value in headers.items()
                }
                assert headers["user-agent"] == "foobar"
예제 #13
0
    async def test_page_coroutine_screenshot(self):
        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:
            with NamedTemporaryFile(mode="w+b") as png_file:
                with StaticMockServer() as server:
                    req = Request(
                        url=server.urljoin("/index.html"),
                        meta={
                            "playwright": True,
                            "playwright_page_coroutines": {
                                "png":
                                PageCoro("screenshot",
                                         path=png_file.name,
                                         type="png"),
                            },
                        },
                    )
                    await handler._download_request(req, Spider("foo"))

                png_file.file.seek(0)
                assert png_file.file.read(
                ) == req.meta["playwright_page_coroutines"]["png"].result
                assert get_mimetype(png_file) == "image/png"
예제 #14
0
    async def test_event_handler_dialog_missing(self, caplog):
        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:
            with StaticMockServer() as server:
                spider = DialogSpider()
                req = Request(
                    url=server.urljoin("/index.html"),
                    meta={
                        "playwright": True,
                        "playwright_page_event_handlers": {
                            "dialog": "missing_method",
                        },
                    },
                )
                await handler._download_request(req, spider)

        assert (
            "scrapy-playwright",
            logging.WARNING,
            "Spider 'dialog' does not have a 'missing_method' attribute,"
            " ignoring handler for event 'dialog'",
        ) in caplog.record_tuples
        assert getattr(spider, "dialog_message", None) is None
예제 #15
0
    async def test_page_coroutine_navigation(self):
        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:
            with StaticMockServer() as server:
                req = Request(
                    url=server.urljoin("/index.html"),
                    meta={
                        "playwright":
                        True,
                        "playwright_page_coroutines":
                        [PageCoro("click", "a.lorem_ipsum")],
                    },
                )
                resp = await handler._download_request(req, Spider("foo"))

            assert isinstance(resp, HtmlResponse)
            assert resp.request is req
            assert resp.url == server.urljoin("/lorem_ipsum.html")
            assert resp.status == 200
            assert "playwright" in resp.flags
            assert resp.css("title::text").get() == "Lorem Ipsum"
            text = resp.css("p::text").get()
            assert text == "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
예제 #16
0
    async def test_page_coroutine_pdf(self):
        if self.browser_type != "chromium":
            pytest.skip("PDF generation is supported only in Chromium")

        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:
            with NamedTemporaryFile(mode="w+b") as pdf_file:
                with StaticMockServer() as server:
                    req = Request(
                        url=server.urljoin("/index.html"),
                        meta={
                            "playwright": True,
                            "playwright_page_coroutines": {
                                "pdf": PageCoro("pdf", path=pdf_file.name),
                            },
                        },
                    )
                    await handler._download_request(req, Spider("foo"))

                pdf_file.file.seek(0)
                assert pdf_file.file.read(
                ) == req.meta["playwright_page_coroutines"]["pdf"].result
                assert get_mimetype(pdf_file) == "application/pdf"
예제 #17
0
    async def test_page_coroutine_infinite_scroll(self):
        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:
            with StaticMockServer() as server:
                req = Request(
                    url=server.urljoin("/scroll.html"),
                    headers={"User-Agent": "scrapy-playwright"},
                    meta={
                        "playwright":
                        True,
                        "playwright_page_coroutines": [
                            PageCoro("wait_for_selector",
                                     selector="div.quote"),
                            PageCoro(
                                "evaluate",
                                "window.scrollBy(0, document.body.scrollHeight)"
                            ),
                            PageCoro("wait_for_selector",
                                     selector="div.quote:nth-child(11)"),
                            PageCoro(
                                "evaluate",
                                "window.scrollBy(0, document.body.scrollHeight)"
                            ),
                            PageCoro("wait_for_selector",
                                     selector="div.quote:nth-child(21)"),
                        ],
                    },
                )
                resp = await handler._download_request(req, Spider("foo"))

            assert isinstance(resp, HtmlResponse)
            assert resp.request is req
            assert resp.url == server.urljoin("/scroll.html")
            assert resp.status == 200
            assert "playwright" in resp.flags
            assert len(resp.css("div.quote")) == 30