def test_errors(self): view = AutologinAPI() with MockServer(): request = api_request(username='******', password='******') with pytest.raises(HttpError) as excinfo: yield render(view, request) assert excinfo.value.args[0] == 'Missing required field "url"' request = api_request(url='http://example.com', foo='bar') with pytest.raises(HttpError) as excinfo: yield render(view, request) assert excinfo.value.args[0] == 'Arguments foo not supported'
def crawl_items(spider_cls, resource_cls, settings, spider_kwargs=None): """Use spider_cls to crawl resource_cls. URL of the resource is passed to the spider as ``url`` argument. Return ``(items, resource_url, crawler)`` tuple. """ spider_kwargs = {} if spider_kwargs is None else spider_kwargs crawler = make_crawler(spider_cls, settings) with MockServer(resource_cls) as s: root_url = s.root_url yield crawler.crawl(url=root_url, **spider_kwargs) result = crawler.spider.collected_items, s.root_url, crawler returnValue(result)
def setUp(self): self.mockserver = MockServer() self.mockserver.__enter__() self._oldenv = os.environ.copy() self._proxy = HTTPSProxy() self._proxy.start() # Wait for the proxy to start. time.sleep(1.0) os.environ['https_proxy'] = self._proxy.http_address() os.environ['http_proxy'] = self._proxy.http_address()
def setUp(self): self.mockserver = MockServer() self.mockserver.__enter__() # prepare a directory for storing files self.tmpmediastore = self.mktemp() os.mkdir(self.tmpmediastore) self.settings = { 'ITEM_PIPELINES': {self.pipeline_class: 1}, self.store_setting_key: self.tmpmediastore, } self.runner = CrawlerRunner(self.settings) self.items = []
def test_login_wrong_pw(self): url = 'http://localhost:{}{}'.format(PORT, Login.url) view = AutologinAPI() with MockServer(): request = api_request(url=url, username='******', password='******') yield render(view, request) result = api_result(request) assert result['status'] == 'error' assert 'response' in result response = result['response'] assert 'cookies' in response assert 'text' in response assert 'headers' in response
def test_proxy(self): assert 'localhost' not in self.url, 'proxy_bypass bypasses localhost' with MockServer('tests.proxy'): with pytest.raises(AutoLoginException) as e: self.al.auth_cookies_from_url( self.url_check_proxy, 'admin', 'secret') cookies = self.al.auth_cookies_from_url( self.url_check_proxy, 'admin', 'secret', settings={ 'HTTP_PROXY': 'http://127.0.0.1:{}'.format(PROXY_PORT) }, ) assert {c.name: c.value for c in cookies} == {'_auth': 'yes'}
def setUp(self): try: import mitmproxy # noqa: F401 except ImportError: self.skipTest('mitmproxy is not installed') self.mockserver = MockServer() self.mockserver.__enter__() self._oldenv = os.environ.copy() self._proxy = MitmProxy() proxy_url = self._proxy.start() os.environ['https_proxy'] = proxy_url os.environ['http_proxy'] = proxy_url
async def test_post_request(self): async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: with MockServer() as server: req = FormRequest(server.urljoin("/delay/2"), meta={"playwright": True}, formdata={"foo": "bar"}) resp = await handler._download_request(req, Spider("foo")) assert resp.request is req assert resp.url == req.url assert resp.status == 200 assert "playwright" in resp.flags assert "Request body: foo=bar" in resp.text
def test_login1(self): url = 'http://localhost:{}{}'.format(PORT, Login.url) view = AutologinAPI() with MockServer(): request = api_request(url=url, username='******', password='******') yield render(view, request) result = api_result(request) assert result['status'] == 'solved' assert result['start_url'] == url assert {c['name']: c['value'] for c in result['cookies']} == \ {'_auth': 'yes'} assert 'response' in result response = result['response'] assert response['cookies'] == result['cookies'] assert 'text' in response assert 'headers' in response
async def test_timeout(self): handler = ScrapyPlaywrightDownloadHandler( get_crawler( settings_dict={ "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 1000, } ) ) await handler._launch_browser() with MockServer() as server: req = Request(server.urljoin("/index.html"), meta={"playwright": True}) with pytest.raises(TimeoutError): await handler._download_request(req, Spider("foo")) await handler.browser.close()
def run_and_export(self, spider_cls, settings=None): """ Run spider with specified settings; return exported data. """ tmpdir = tempfile.mkdtemp() res_name = tmpdir + '/res' defaults = { 'FEED_URI': 'file://' + res_name, 'FEED_FORMAT': 'csv', } defaults.update(settings or {}) try: with MockServer() as s: runner = CrawlerRunner(Settings(defaults)) yield runner.crawl(spider_cls) with open(res_name, 'rb') as f: defer.returnValue(f.read()) finally: shutil.rmtree(tmpdir)
async def test_post_request(self): handler = ScrapyPlaywrightDownloadHandler( get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) ) await handler._launch_browser() with MockServer() as server: req = FormRequest( server.urljoin("/"), meta={"playwright": True}, formdata={"foo": "bar"} ) resp = await handler._download_request(req, Spider("foo")) assert resp.request is req assert resp.url == req.url assert resp.status == 200 assert "playwright" in resp.flags assert "Request body: foo=bar" in resp.text await handler.browser.close()
async def test_user_agent(self): settings_dict = { "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, "PLAYWRIGHT_CONTEXTS": { "default": { "user_agent": self.browser_type } }, "USER_AGENT": None, } async with make_handler(settings_dict) as handler: with MockServer() as server: # if Scrapy's user agent is None, use the one from the Browser req = Request( url=server.urljoin("/headers"), meta={"playwright": True}, ) resp = await handler._download_request(req, Spider("foo")) headers = json.loads(resp.css("pre::text").get()) headers = { key.lower(): value for key, value in headers.items() } assert headers["user-agent"] == self.browser_type # if Scrapy's user agent is set to some value, use it req = Request( url=server.urljoin("/headers"), meta={"playwright": True}, headers={"User-Agent": "foobar"}, ) resp = await handler._download_request(req, Spider("foo")) headers = json.loads(resp.css("pre::text").get()) headers = { key.lower(): value for key, value in headers.items() } assert headers["user-agent"] == "foobar"
def run_and_export(self, spider_cls, settings=None): """ Run spider with specified settings; return exported data. """ tmpdir = tempfile.mkdtemp() res_path = os.path.join(tmpdir, 'res') res_uri = urljoin('file:', pathname2url(res_path)) defaults = { 'FEED_URI': res_uri, 'FEED_FORMAT': 'csv', } defaults.update(settings or {}) try: with MockServer() as s: runner = CrawlerRunner(Settings(defaults)) spider_cls.start_urls = [s.url('/')] yield runner.crawl(spider_cls) with open(res_path, 'rb') as f: content = f.read() finally: shutil.rmtree(tmpdir, ignore_errors=True) defer.returnValue(content)
def run_and_export(self, spider_cls, settings): """ Run spider with specified settings; return exported data. """ def path_to_url(path): return urljoin('file:', pathname2url(str(path))) def printf_escape(string): return string.replace('%', '%%') FEEDS = settings.get('FEEDS') or {} settings['FEEDS'] = { printf_escape(path_to_url(file_path)): feed for file_path, feed in FEEDS.items() } content = {} try: with MockServer() as s: runner = CrawlerRunner(Settings(settings)) spider_cls.start_urls = [s.url('/')] yield runner.crawl(spider_cls) for file_path, feed in FEEDS.items(): if not os.path.exists(str(file_path)): continue with open(str(file_path), 'rb') as f: content[feed['format']] = f.read() finally: for file_path in FEEDS.keys(): if not os.path.exists(str(file_path)): continue os.remove(str(file_path)) return content
from tests.mockserver import MockServer, MockDNSServer class LocalhostSpider(Spider): name = "localhost_spider" def start_requests(self): yield Request(self.url) def parse(self, response): netloc = urlparse(response.url).netloc self.logger.info("Host: %s" % netloc.split(":")[0]) self.logger.info("Type: %s" % type(response.ip_address)) self.logger.info("IP address: %s" % response.ip_address) if __name__ == "__main__": with MockServer() as mock_http_server, MockDNSServer() as mock_dns_server: port = urlparse(mock_http_server.http_address).port url = "http://not.a.real.domain:{port}/echo".format(port=port) servers = [(mock_dns_server.host, mock_dns_server.port)] reactor.installResolver(createResolver(servers=servers)) configure_logging() runner = CrawlerRunner() d = runner.crawl(LocalhostSpider, url=url) d.addBoth(lambda _: reactor.stop()) reactor.run()
def setUp(self): self.mockserver = MockServer() self.mockserver.__enter__()
def setUp(self): self.mockserver = MockServer() self.mockserver.__enter__() self.runner = CrawlerRunner()
def setUpClass(cls): cls.mockserver = MockServer() cls.mockserver.__enter__()
def setUp(self): self.al = AutoLogin() self.mockserver = MockServer() self.mockserver.__enter__()