def test_robotstxt_error(self):
        self.crawler.settings.set('ROBOTSTXT_OBEY', True)
        err = error.DNSLookupError('Robotstxt address not found')
        def return_failure(request, spider):
            deferred = Deferred()
            reactor.callFromThread(deferred.errback, failure.Failure(err))
            return deferred
        self.crawler.engine.download.side_effect = return_failure

        middleware = RobotsTxtMiddleware(self.crawler)
        middleware._logerror = mock.MagicMock(side_effect=middleware._logerror)
        deferred = middleware.process_request(Request('http://site.local'), None)
        deferred.addCallback(lambda _: self.assertTrue(middleware._logerror.called))
        return deferred
    def test_robotstxt_error(self):
        self.crawler.settings.set('ROBOTSTXT_OBEY', True)
        err = error.DNSLookupError('Robotstxt address not found')
        def return_failure(request, spider):
            deferred = Deferred()
            reactor.callFromThread(deferred.errback, failure.Failure(err))
            return deferred
        self.crawler.engine.download.side_effect = return_failure

        middleware = RobotsTxtMiddleware(self.crawler)
        middleware._logerror = mock.MagicMock(side_effect=middleware._logerror)
        deferred = middleware.process_request(Request('http://site.local'), None)
        deferred.addCallback(lambda _: self.assertTrue(middleware._logerror.called))
        return deferred
Esempio n. 3
0
    def test_robotstxt_garbage(self):
        # garbage response should be discarded, equal 'allow all'
        middleware = RobotsTxtMiddleware(self._get_garbage_crawler())
        middleware._logerror = mock.MagicMock()
        middleware.process_request(Request('http://site.local'), None)
        self.assertNotIgnored(Request('http://site.local'), middleware)

        def test(r):
            self.assertNotIgnored(Request('http://site.local/allowed'),
                                  middleware)
            self.assertNotIgnored(Request('http://site.local/admin/main'),
                                  middleware)
            self.assertNotIgnored(Request('http://site.local/static/'),
                                  middleware)

        deferred = Deferred()
        deferred.addCallback(test)
        deferred.addErrback(lambda _: self.assertIsNone(middleware._logerror.
                                                        assert_any_call()))
        reactor.callFromThread(deferred.callback, None)
        return deferred