Ejemplo n.º 1
0
import os
import sys
from twisted.trial.unittest import TestCase, SkipTest

from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware
from scrapy.exceptions import NotConfigured
from scrapy.http import Response, Request
from scrapy.spider import Spider

spider = Spider('foo')


class TestDefaultHeadersMiddleware(TestCase):

    failureException = AssertionError

    def setUp(self):
        self._oldenv = os.environ.copy()

    def tearDown(self):
        os.environ = self._oldenv

    def test_no_proxies(self):
        os.environ = {}
        self.assertRaises(NotConfigured, HttpProxyMiddleware)

    def test_no_enviroment_proxies(self):
        os.environ = {'dummy_proxy': 'reset_env_and_do_not_raise'}
        mw = HttpProxyMiddleware()

        for url in ('http://e.com', 'https://e.com', 'file:///tmp/a'):
Ejemplo n.º 2
0
 def setUp(self):
     self.spider = Spider('media.com')
     self.pipe = self.pipeline_class(download_func=_mocked_download_func)
     self.pipe.open_spider(self.spider)
     self.info = self.pipe.spiderinfo
 def setUp(self):
     self.mw = DecompressionMiddleware()
     self.spider = Spider('foo')
Ejemplo n.º 4
0
 def setUp(self):
     s3reqh = S3DownloadHandler(Settings(), self.AWS_ACCESS_KEY_ID, \
             self.AWS_SECRET_ACCESS_KEY, \
             httpdownloadhandler=HttpDownloadHandlerMock)
     self.download_request = s3reqh.download_request
     self.spider = Spider('foo')
Ejemplo n.º 5
0
 def setUp(self):
     crawler = get_crawler()
     self.spider = Spider('foo')
     self.mw = RetryMiddleware.from_crawler(crawler)
     self.mw.max_retry_times = 2
Ejemplo n.º 6
0
 def get_spider_and_mw(self, default_useragent):
     crawler = get_crawler({'USER_AGENT': default_useragent})
     spider = Spider('foo')
     spider.set_crawler(crawler)
     return spider, UserAgentMiddleware.from_crawler(crawler)
Ejemplo n.º 7
0
 def test_download_head(self):
     request = Request(self.getURL('file'), method='HEAD')
     d = self.download_request(request, Spider('foo'))
     d.addCallback(lambda r: r.body)
     d.addCallback(self.assertEquals, '')
     return d
Ejemplo n.º 8
0
 def setUp(self):
     crawler = get_crawler()
     self.spider = Spider('foo')
     self.mw = RedirectMiddleware.from_crawler(crawler)
Ejemplo n.º 9
0
 def setUp(self):
     crawler = get_crawler()
     self.spider = Spider('foo')
     self.mw = MetaRefreshMiddleware.from_crawler(crawler)
Ejemplo n.º 10
0
 def test_msg_spider(self):
     spider = Spider("myspider")
     log.msg("Hello", spider=spider)
     self.assertEqual(self.logged(), "[myspider] INFO: Hello")
Ejemplo n.º 11
0
 def test_msg_level_spider(self):
     spider = Spider("myspider")
     log.msg("Hello", spider=spider, level=log.WARNING)
     self.assertEqual(self.logged(), "[myspider] WARNING: Hello")
Ejemplo n.º 12
0
 def setUp(self):
     self.spider = Spider('foo')
     self.settings = {'CRAWLERA_USER': '******', 'CRAWLERA_PASS': '******'}
Ejemplo n.º 13
0
 def test_msg_ignore_spider(self):
     spider = Spider("myspider")
     log.msg("Hello", spider=spider)
     self.failIf(self.logged())
Ejemplo n.º 14
0
 def setUp(self):
     self.formatter = LogFormatter()
     self.spider = Spider('default')
Ejemplo n.º 15
0
 def _get_spider(self):
     return Spider('foo')
Ejemplo n.º 16
0
 def setUp(self):
     self.spider = Spider('myspider',
                          start_urls=["http://example.com"])
     self.response = HtmlResponse(body="<html></html>",
                                  url="http://www.example.com")
Ejemplo n.º 17
0
 def _get_spider(self):
     bad_hostname = urlparse('http:////scrapytest.org').hostname
     return Spider('foo',
                   allowed_domains=['scrapytest.org', None, bad_hostname])
 def setUp(self):
     self.spider = Spider('foo')
     crawler = get_crawler({'AJAXCRAWL_ENABLED': True})
     self.mw = AjaxCrawlMiddleware.from_crawler(crawler)
Ejemplo n.º 19
0
 def test_download(self):
     request = Request(self.getURL('file'))
     d = self.download_request(request, Spider('foo'))
     d.addCallback(lambda r: r.body)
     d.addCallback(self.assertEquals, "0123456789")
     return d
Ejemplo n.º 20
0
 def setUp(self):
     self.spider = Spider('foo')
     self.mw = RefererMiddleware()
Ejemplo n.º 21
0
 def test_redirect_status_head(self):
     request = Request(self.getURL('redirect'), method='HEAD')
     d = self.download_request(request, Spider('foo'))
     d.addCallback(lambda r: r.status)
     d.addCallback(self.assertEquals, 302)
     return d
 def setUp(self):
     self.spider = Spider('foo')
     self.mw = HttpErrorMiddleware(Settings({'HTTPERROR_ALLOW_ALL': True}))
     self.req = Request('http://scrapytest.org')
     self.res200, self.res404, self.res402 = _responses(
         self.req, [200, 404, 402])
Ejemplo n.º 23
0
 def test_non_existent(self):
     request = Request('file://%s' % self.mktemp())
     d = self.download_request(request, Spider('foo'))
     return self.assertFailure(d, IOError)
 def setUp(self):
     self.spider = Spider('foo')
     self.mw = HttpErrorMiddleware(Settings({}))
     self.req = Request('http://scrapytest.org')
     self.res200, self.res404 = _responses(self.req, [200, 404])
Ejemplo n.º 25
0
 def setUp(self):
     self.spider = Spider('foo')
     self.settings = {'HUBPROXY_USER': '******', 'HUBPROXY_PASS': '******'}
Ejemplo n.º 26
0
 def _get_spider(self):
     return Spider('foo', allowed_domains=['scrapytest.org', 'scrapy.org'])
Ejemplo n.º 27
0
 def setUp(self):
     self.spider = Spider('foo')
     self.mw = CookiesMiddleware()
Ejemplo n.º 28
0
 def _get_spider(self):
     return Spider('foo', allowed_domains=None)
Ejemplo n.º 29
0
 def setUp(self):
     self.spider = Spider('df_tests')
     self.temp_dir = tempfile.gettempdir()
     self.db_path = os.path.join(self.temp_dir, 'df_tests.db')
     crawler = get_crawler(Spider)
     self.stats = StatsCollector(crawler)
Ejemplo n.º 30
0
 def setUp(self):
     self.spider = Spider('myspider')
     self.key = 'scrapy_redis:tests:%s:queue' % self.spider.name
     self.server = redis.Redis(REDIS_HOST, REDIS_PORT)
     self.q = self.queue_cls(self.server, Spider('myspider'), self.key)