예제 #1
0
    def test_errors(self):
        view = AutologinAPI()
        with MockServer():
            request = api_request(username='******', password='******')
            with pytest.raises(HttpError) as excinfo:
                yield render(view, request)
            assert excinfo.value.args[0] == 'Missing required field "url"'

            request = api_request(url='http://example.com', foo='bar')
            with pytest.raises(HttpError) as excinfo:
                yield render(view, request)
            assert excinfo.value.args[0] == 'Arguments foo not supported'
예제 #2
0
def crawl_items(spider_cls, resource_cls, settings, spider_kwargs=None):
    """Use spider_cls to crawl resource_cls. URL of the resource is passed
    to the spider as ``url`` argument.
    Return ``(items, resource_url, crawler)`` tuple.
    """
    spider_kwargs = {} if spider_kwargs is None else spider_kwargs
    crawler = make_crawler(spider_cls, settings)
    with MockServer(resource_cls) as s:
        root_url = s.root_url
        yield crawler.crawl(url=root_url, **spider_kwargs)
    result = crawler.spider.collected_items, s.root_url, crawler
    returnValue(result)
    def setUp(self):
        self.mockserver = MockServer()
        self.mockserver.__enter__()
        self._oldenv = os.environ.copy()

        self._proxy = HTTPSProxy()
        self._proxy.start()

        # Wait for the proxy to start.
        time.sleep(1.0)
        os.environ['https_proxy'] = self._proxy.http_address()
        os.environ['http_proxy'] = self._proxy.http_address()
    def setUp(self):
        self.mockserver = MockServer()
        self.mockserver.__enter__()

        # prepare a directory for storing files
        self.tmpmediastore = self.mktemp()
        os.mkdir(self.tmpmediastore)
        self.settings = {
            'ITEM_PIPELINES': {self.pipeline_class: 1},
            self.store_setting_key: self.tmpmediastore,
        }
        self.runner = CrawlerRunner(self.settings)
        self.items = []
예제 #5
0
 def test_login_wrong_pw(self):
     url = 'http://localhost:{}{}'.format(PORT, Login.url)
     view = AutologinAPI()
     with MockServer():
         request = api_request(url=url, username='******', password='******')
         yield render(view, request)
         result = api_result(request)
         assert result['status'] == 'error'
         assert 'response' in result
         response = result['response']
         assert 'cookies' in response
         assert 'text' in response
         assert 'headers' in response
예제 #6
0
 def test_proxy(self):
     assert 'localhost' not in self.url, 'proxy_bypass bypasses localhost'
     with MockServer('tests.proxy'):
         with pytest.raises(AutoLoginException) as e:
             self.al.auth_cookies_from_url(
                 self.url_check_proxy, 'admin', 'secret')
         cookies = self.al.auth_cookies_from_url(
             self.url_check_proxy, 'admin', 'secret',
             settings={
                 'HTTP_PROXY': 'http://127.0.0.1:{}'.format(PROXY_PORT)
             },
         )
     assert {c.name: c.value for c in cookies} == {'_auth': 'yes'}
예제 #7
0
    def setUp(self):
        try:
            import mitmproxy  # noqa: F401
        except ImportError:
            self.skipTest('mitmproxy is not installed')

        self.mockserver = MockServer()
        self.mockserver.__enter__()
        self._oldenv = os.environ.copy()

        self._proxy = MitmProxy()
        proxy_url = self._proxy.start()
        os.environ['https_proxy'] = proxy_url
        os.environ['http_proxy'] = proxy_url
예제 #8
0
    async def test_post_request(self):
        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE":
                                 self.browser_type}) as handler:
            with MockServer() as server:
                req = FormRequest(server.urljoin("/delay/2"),
                                  meta={"playwright": True},
                                  formdata={"foo": "bar"})
                resp = await handler._download_request(req, Spider("foo"))

            assert resp.request is req
            assert resp.url == req.url
            assert resp.status == 200
            assert "playwright" in resp.flags
            assert "Request body: foo=bar" in resp.text
예제 #9
0
 def test_login1(self):
     url = 'http://localhost:{}{}'.format(PORT, Login.url)
     view = AutologinAPI()
     with MockServer():
         request = api_request(url=url, username='******', password='******')
         yield render(view, request)
         result = api_result(request)
         assert result['status'] == 'solved'
         assert result['start_url'] == url
         assert {c['name']: c['value'] for c in result['cookies']} == \
                {'_auth': 'yes'}
         assert 'response' in result
         response = result['response']
         assert response['cookies'] == result['cookies']
         assert 'text' in response
         assert 'headers' in response
예제 #10
0
    async def test_timeout(self):
        handler = ScrapyPlaywrightDownloadHandler(
            get_crawler(
                settings_dict={
                    "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
                    "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 1000,
                }
            )
        )
        await handler._launch_browser()

        with MockServer() as server:
            req = Request(server.urljoin("/index.html"), meta={"playwright": True})
            with pytest.raises(TimeoutError):
                await handler._download_request(req, Spider("foo"))

        await handler.browser.close()
예제 #11
0
    def run_and_export(self, spider_cls, settings=None):
        """ Run spider with specified settings; return exported data. """
        tmpdir = tempfile.mkdtemp()
        res_name = tmpdir + '/res'
        defaults = {
            'FEED_URI': 'file://' + res_name,
            'FEED_FORMAT': 'csv',
        }
        defaults.update(settings or {})
        try:
            with MockServer() as s:
                runner = CrawlerRunner(Settings(defaults))
                yield runner.crawl(spider_cls)

            with open(res_name, 'rb') as f:
                defer.returnValue(f.read())

        finally:
            shutil.rmtree(tmpdir)
예제 #12
0
    async def test_post_request(self):
        handler = ScrapyPlaywrightDownloadHandler(
            get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type})
        )
        await handler._launch_browser()

        with MockServer() as server:
            req = FormRequest(
                server.urljoin("/"), meta={"playwright": True}, formdata={"foo": "bar"}
            )
            resp = await handler._download_request(req, Spider("foo"))

        assert resp.request is req
        assert resp.url == req.url
        assert resp.status == 200
        assert "playwright" in resp.flags
        assert "Request body: foo=bar" in resp.text

        await handler.browser.close()
예제 #13
0
    async def test_user_agent(self):
        settings_dict = {
            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
            "PLAYWRIGHT_CONTEXTS": {
                "default": {
                    "user_agent": self.browser_type
                }
            },
            "USER_AGENT": None,
        }
        async with make_handler(settings_dict) as handler:
            with MockServer() as server:
                # if Scrapy's user agent is None, use the one from the Browser
                req = Request(
                    url=server.urljoin("/headers"),
                    meta={"playwright": True},
                )
                resp = await handler._download_request(req, Spider("foo"))
                headers = json.loads(resp.css("pre::text").get())
                headers = {
                    key.lower(): value
                    for key, value in headers.items()
                }
                assert headers["user-agent"] == self.browser_type

                # if Scrapy's user agent is set to some value, use it
                req = Request(
                    url=server.urljoin("/headers"),
                    meta={"playwright": True},
                    headers={"User-Agent": "foobar"},
                )
                resp = await handler._download_request(req, Spider("foo"))
                headers = json.loads(resp.css("pre::text").get())
                headers = {
                    key.lower(): value
                    for key, value in headers.items()
                }
                assert headers["user-agent"] == "foobar"
예제 #14
0
    def run_and_export(self, spider_cls, settings=None):
        """ Run spider with specified settings; return exported data. """
        tmpdir = tempfile.mkdtemp()
        res_path = os.path.join(tmpdir, 'res')
        res_uri = urljoin('file:', pathname2url(res_path))
        defaults = {
            'FEED_URI': res_uri,
            'FEED_FORMAT': 'csv',
        }
        defaults.update(settings or {})
        try:
            with MockServer() as s:
                runner = CrawlerRunner(Settings(defaults))
                spider_cls.start_urls = [s.url('/')]
                yield runner.crawl(spider_cls)

            with open(res_path, 'rb') as f:
                content = f.read()

        finally:
            shutil.rmtree(tmpdir, ignore_errors=True)

        defer.returnValue(content)
예제 #15
0
    def run_and_export(self, spider_cls, settings):
        """ Run spider with specified settings; return exported data. """
        def path_to_url(path):
            return urljoin('file:', pathname2url(str(path)))

        def printf_escape(string):
            return string.replace('%', '%%')

        FEEDS = settings.get('FEEDS') or {}
        settings['FEEDS'] = {
            printf_escape(path_to_url(file_path)): feed
            for file_path, feed in FEEDS.items()
        }

        content = {}
        try:
            with MockServer() as s:
                runner = CrawlerRunner(Settings(settings))
                spider_cls.start_urls = [s.url('/')]
                yield runner.crawl(spider_cls)

            for file_path, feed in FEEDS.items():
                if not os.path.exists(str(file_path)):
                    continue

                with open(str(file_path), 'rb') as f:
                    content[feed['format']] = f.read()

        finally:
            for file_path in FEEDS.keys():
                if not os.path.exists(str(file_path)):
                    continue

                os.remove(str(file_path))

        return content
예제 #16
0
from tests.mockserver import MockServer, MockDNSServer


class LocalhostSpider(Spider):
    name = "localhost_spider"

    def start_requests(self):
        yield Request(self.url)

    def parse(self, response):
        netloc = urlparse(response.url).netloc
        self.logger.info("Host: %s" % netloc.split(":")[0])
        self.logger.info("Type: %s" % type(response.ip_address))
        self.logger.info("IP address: %s" % response.ip_address)


if __name__ == "__main__":
    with MockServer() as mock_http_server, MockDNSServer() as mock_dns_server:
        port = urlparse(mock_http_server.http_address).port
        url = "http://not.a.real.domain:{port}/echo".format(port=port)

        servers = [(mock_dns_server.host, mock_dns_server.port)]
        reactor.installResolver(createResolver(servers=servers))

        configure_logging()
        runner = CrawlerRunner()
        d = runner.crawl(LocalhostSpider, url=url)
        d.addBoth(lambda _: reactor.stop())
        reactor.run()
 def setUp(self):
     self.mockserver = MockServer()
     self.mockserver.__enter__()
예제 #18
0
 def setUp(self):
     self.mockserver = MockServer()
     self.mockserver.__enter__()
     self.runner = CrawlerRunner()
예제 #19
0
 def setUpClass(cls):
     cls.mockserver = MockServer()
     cls.mockserver.__enter__()
예제 #20
0
 def setUp(self):
     self.al = AutoLogin()
     self.mockserver = MockServer()
     self.mockserver.__enter__()