import os import sys from twisted.trial.unittest import TestCase, SkipTest from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware from scrapy.exceptions import NotConfigured from scrapy.http import Response, Request from scrapy.spider import Spider spider = Spider('foo') class TestDefaultHeadersMiddleware(TestCase): failureException = AssertionError def setUp(self): self._oldenv = os.environ.copy() def tearDown(self): os.environ = self._oldenv def test_no_proxies(self): os.environ = {} self.assertRaises(NotConfigured, HttpProxyMiddleware) def test_no_enviroment_proxies(self): os.environ = {'dummy_proxy': 'reset_env_and_do_not_raise'} mw = HttpProxyMiddleware() for url in ('http://e.com', 'https://e.com', 'file:///tmp/a'):
def setUp(self): self.spider = Spider('media.com') self.pipe = self.pipeline_class(download_func=_mocked_download_func) self.pipe.open_spider(self.spider) self.info = self.pipe.spiderinfo
def setUp(self): self.mw = DecompressionMiddleware() self.spider = Spider('foo')
def setUp(self): s3reqh = S3DownloadHandler(Settings(), self.AWS_ACCESS_KEY_ID, \ self.AWS_SECRET_ACCESS_KEY, \ httpdownloadhandler=HttpDownloadHandlerMock) self.download_request = s3reqh.download_request self.spider = Spider('foo')
def setUp(self): crawler = get_crawler() self.spider = Spider('foo') self.mw = RetryMiddleware.from_crawler(crawler) self.mw.max_retry_times = 2
def get_spider_and_mw(self, default_useragent): crawler = get_crawler({'USER_AGENT': default_useragent}) spider = Spider('foo') spider.set_crawler(crawler) return spider, UserAgentMiddleware.from_crawler(crawler)
def test_download_head(self): request = Request(self.getURL('file'), method='HEAD') d = self.download_request(request, Spider('foo')) d.addCallback(lambda r: r.body) d.addCallback(self.assertEquals, '') return d
def setUp(self): crawler = get_crawler() self.spider = Spider('foo') self.mw = RedirectMiddleware.from_crawler(crawler)
def setUp(self): crawler = get_crawler() self.spider = Spider('foo') self.mw = MetaRefreshMiddleware.from_crawler(crawler)
def test_msg_spider(self): spider = Spider("myspider") log.msg("Hello", spider=spider) self.assertEqual(self.logged(), "[myspider] INFO: Hello")
def test_msg_level_spider(self): spider = Spider("myspider") log.msg("Hello", spider=spider, level=log.WARNING) self.assertEqual(self.logged(), "[myspider] WARNING: Hello")
def setUp(self): self.spider = Spider('foo') self.settings = {'CRAWLERA_USER': '******', 'CRAWLERA_PASS': '******'}
def test_msg_ignore_spider(self): spider = Spider("myspider") log.msg("Hello", spider=spider) self.failIf(self.logged())
def setUp(self): self.formatter = LogFormatter() self.spider = Spider('default')
def _get_spider(self): return Spider('foo')
def setUp(self): self.spider = Spider('myspider', start_urls=["http://example.com"]) self.response = HtmlResponse(body="<html></html>", url="http://www.example.com")
def _get_spider(self): bad_hostname = urlparse('http:////scrapytest.org').hostname return Spider('foo', allowed_domains=['scrapytest.org', None, bad_hostname])
def setUp(self): self.spider = Spider('foo') crawler = get_crawler({'AJAXCRAWL_ENABLED': True}) self.mw = AjaxCrawlMiddleware.from_crawler(crawler)
def test_download(self): request = Request(self.getURL('file')) d = self.download_request(request, Spider('foo')) d.addCallback(lambda r: r.body) d.addCallback(self.assertEquals, "0123456789") return d
def setUp(self): self.spider = Spider('foo') self.mw = RefererMiddleware()
def test_redirect_status_head(self): request = Request(self.getURL('redirect'), method='HEAD') d = self.download_request(request, Spider('foo')) d.addCallback(lambda r: r.status) d.addCallback(self.assertEquals, 302) return d
def setUp(self): self.spider = Spider('foo') self.mw = HttpErrorMiddleware(Settings({'HTTPERROR_ALLOW_ALL': True})) self.req = Request('http://scrapytest.org') self.res200, self.res404, self.res402 = _responses( self.req, [200, 404, 402])
def test_non_existent(self): request = Request('file://%s' % self.mktemp()) d = self.download_request(request, Spider('foo')) return self.assertFailure(d, IOError)
def setUp(self): self.spider = Spider('foo') self.mw = HttpErrorMiddleware(Settings({})) self.req = Request('http://scrapytest.org') self.res200, self.res404 = _responses(self.req, [200, 404])
def setUp(self): self.spider = Spider('foo') self.settings = {'HUBPROXY_USER': '******', 'HUBPROXY_PASS': '******'}
def _get_spider(self): return Spider('foo', allowed_domains=['scrapytest.org', 'scrapy.org'])
def setUp(self): self.spider = Spider('foo') self.mw = CookiesMiddleware()
def _get_spider(self): return Spider('foo', allowed_domains=None)
def setUp(self): self.spider = Spider('df_tests') self.temp_dir = tempfile.gettempdir() self.db_path = os.path.join(self.temp_dir, 'df_tests.db') crawler = get_crawler(Spider) self.stats = StatsCollector(crawler)
def setUp(self): self.spider = Spider('myspider') self.key = 'scrapy_redis:tests:%s:queue' % self.spider.name self.server = redis.Redis(REDIS_HOST, REDIS_PORT) self.q = self.queue_cls(self.server, Spider('myspider'), self.key)