def test_robotstxt_error(self): self.crawler.settings.set('ROBOTSTXT_OBEY', True) err = error.DNSLookupError('Robotstxt address not found') def return_failure(request, spider): deferred = Deferred() reactor.callFromThread(deferred.errback, failure.Failure(err)) return deferred self.crawler.engine.download.side_effect = return_failure middleware = RobotsTxtMiddleware(self.crawler) middleware._logerror = mock.MagicMock(side_effect=middleware._logerror) deferred = middleware.process_request(Request('http://site.local'), None) deferred.addCallback(lambda _: self.assertTrue(middleware._logerror.called)) return deferred
def test_robotstxt_error(self): self.crawler.settings.set('ROBOTSTXT_OBEY', True) err = error.DNSLookupError('Robotstxt address not found') def return_failure(request, spider): deferred = Deferred() reactor.callFromThread(deferred.errback, failure.Failure(err)) return deferred self.crawler.engine.download.side_effect = return_failure middleware = RobotsTxtMiddleware(self.crawler) middleware._logerror = mock.MagicMock(side_effect=middleware._logerror) deferred = middleware.process_request(Request('http://site.local'), None) deferred.addCallback(lambda _: self.assertTrue(middleware._logerror.called)) return deferred
def test_robotstxt_garbage(self): # garbage response should be discarded, equal 'allow all' middleware = RobotsTxtMiddleware(self._get_garbage_crawler()) middleware._logerror = mock.MagicMock() middleware.process_request(Request('http://site.local'), None) self.assertNotIgnored(Request('http://site.local'), middleware) def test(r): self.assertNotIgnored(Request('http://site.local/allowed'), middleware) self.assertNotIgnored(Request('http://site.local/admin/main'), middleware) self.assertNotIgnored(Request('http://site.local/static/'), middleware) deferred = Deferred() deferred.addCallback(test) deferred.addErrback(lambda _: self.assertIsNone(middleware._logerror. assert_any_call())) reactor.callFromThread(deferred.callback, None) return deferred