class UserAgentMiddlewareTest(TestCase): def setUp(self): self.spider = BaseSpider() self.mw = UserAgentMiddleware() def tearDown(self): del self.mw def test_default_agent(self): self.mw.default_useragent = 'default_useragent' req = Request('http://scrapytest.org/') assert self.mw.process_request(req, self.spider) is None self.assertEquals(req.headers['User-Agent'], 'default_useragent') # None or not present user_agent attribute is the same self.spider.user_agent = None req = Request('http://scrapytest.org/') assert self.mw.process_request(req, self.spider) is None self.assertEquals(req.headers['User-Agent'], 'default_useragent') def test_spider_agent(self): self.mw.default_useragent = 'default_useragent' self.spider.user_agent = 'spider_useragent' req = Request('http://scrapytest.org/') assert self.mw.process_request(req, self.spider) is None self.assertEquals(req.headers['User-Agent'], 'spider_useragent') def test_header_agent(self): self.mw.default_useragent = 'default_useragent' self.spider.user_agent = 'spider_useragent' req = Request('http://scrapytest.org/', headers={'User-Agent': 'header_useragent'}) assert self.mw.process_request(req, self.spider) is None self.assertEquals(req.headers['User-Agent'], 'header_useragent') def test_no_agent(self): self.mw.default_useragent = None self.spider.user_agent = None req = Request('http://scrapytest.org/') assert self.mw.process_request(req, self.spider) is None assert 'User-Agent' not in req.headers
def __init__(self, user_agent=''): UserAgentMiddleware.__init__(self) self.user_agent = user_agent
def get_spider_and_mw(self, default_useragent): crawler = get_crawler({'USER_AGENT': default_useragent}) spider = Spider('foo') spider.set_crawler(crawler) return spider, UserAgentMiddleware.from_crawler(crawler)
def setUp(self): self.spider = BaseSpider() self.mw = UserAgentMiddleware()