class LoggingContribTest(unittest.TestCase):
    def setUp(self):
        self.formatter = LogFormatter()
        self.spider = BaseSpider('default')

    def test_crawled(self):
        req = Request("http://www.example.com")
        res = Response("http://www.example.com")
        self.assertEqual(
            self.formatter.crawled(req, res, self.spider),
            "Crawled (200) <GET http://www.example.com> (referer: None)")

        req = Request("http://www.example.com",
                      headers={'referer': 'http://example.com'})
        res = Response("http://www.example.com", flags=['cached'])
        self.assertEqual(
            self.formatter.crawled(req, res, self.spider),
            "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']"
        )

    def test_dropped(self):
        item = {}
        exception = Exception(u"\u2018")
        self.assertEqual(self.formatter.dropped(item, exception, self.spider),
                         u"Dropped {} - \u2018")
Exemple #2
0
class LoggingContribTest(unittest.TestCase):

    def setUp(self):
        self.formatter = LogFormatter()
        self.spider = BaseSpider('default')

    def test_crawled(self):
        req = Request("http://www.example.com")
        res = Response("http://www.example.com")
        self.assertEqual(self.formatter.crawled(req, res, self.spider),
            "Crawled (200) <GET http://www.example.com> (referer: None)")

        req = Request("http://www.example.com", headers={'referer': 'http://example.com'})
        res = Response("http://www.example.com", flags=['cached'])
        self.assertEqual(self.formatter.crawled(req, res, self.spider),
            "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")

    def test_dropped(self):
        item = {}
        exception = Exception(u"\u2018")
        response = Response("http://www.example.com")
        lines = self.formatter.dropped(item, exception, response, self.spider).splitlines()
        assert all(isinstance(x, unicode) for x in lines)
        self.assertEqual(lines, [u"Dropped: \u2018", '{}'])

    def test_scraped(self):
        item = CustomItem()
        item['name'] = u'\xa3'
        response = Response("http://www.example.com")
        lines = self.formatter.scraped(item, response, self.spider).splitlines()
        assert all(isinstance(x, unicode) for x in lines)
        self.assertEqual(lines, [u"Scraped from <200 http://www.example.com>", u'name: \xa3'])
class LoggingContribTest(unittest.TestCase):

    def setUp(self):
        self.formatter = LogFormatter()
        self.spider = BaseSpider('default')

    def test_crawled(self):
        req = Request("http://www.example.com")
        res = Response("http://www.example.com")
        self.assertEqual(self.formatter.crawled(req, res, self.spider),
            "Crawled (200) <GET http://www.example.com> (referer: None)")

        req = Request("http://www.example.com", headers={'referer': 'http://example.com'})
        res = Response("http://www.example.com", flags=['cached'])
        self.assertEqual(self.formatter.crawled(req, res, self.spider),
            "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")

    def test_dropped(self):
        item = {}
        exception = Exception(u"\u2018")
        self.assertEqual(self.formatter.dropped(item, exception, self.spider),
            u"Dropped {} - \u2018")
class LogFormatterTestCase(unittest.TestCase):
    def setUp(self):
        self.formatter = LogFormatter()
        self.spider = Spider('default')

    def test_crawled_with_referer(self):
        req = Request("http://www.example.com")
        res = Response("http://www.example.com")
        logkws = self.formatter.crawled(req, res, self.spider)
        logline = logkws['msg'] % logkws['args']
        self.assertEqual(
            logline,
            "Crawled (200) <GET http://www.example.com> (referer: None)")

    def test_crawled_without_referer(self):
        req = Request("http://www.example.com",
                      headers={'referer': 'http://example.com'})
        res = Response("http://www.example.com", flags=['cached'])
        logkws = self.formatter.crawled(req, res, self.spider)
        logline = logkws['msg'] % logkws['args']
        self.assertEqual(
            logline,
            "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']"
        )

    def test_flags_in_request(self):
        req = Request("http://www.example.com", flags=['test', 'flag'])
        res = Response("http://www.example.com")
        logkws = self.formatter.crawled(req, res, self.spider)
        logline = logkws['msg'] % logkws['args']
        self.assertEqual(
            logline,
            "Crawled (200) <GET http://www.example.com> ['test', 'flag'] (referer: None)"
        )

    def test_dropped(self):
        item = {}
        exception = Exception(u"\u2018")
        response = Response("http://www.example.com")
        logkws = self.formatter.dropped(item, exception, response, self.spider)
        logline = logkws['msg'] % logkws['args']
        lines = logline.splitlines()
        assert all(isinstance(x, str) for x in lines)
        self.assertEqual(lines, [u"Dropped: \u2018", '{}'])

    def test_item_error(self):
        # In practice, the complete traceback is shown by passing the
        # 'exc_info' argument to the logging function
        item = {'key': 'value'}
        exception = Exception()
        response = Response("http://www.example.com")
        logkws = self.formatter.item_error(item, exception, response,
                                           self.spider)
        logline = logkws['msg'] % logkws['args']
        self.assertEqual(logline, u"Error processing {'key': 'value'}")

    def test_spider_error(self):
        # In practice, the complete traceback is shown by passing the
        # 'exc_info' argument to the logging function
        failure = Failure(Exception())
        request = Request("http://www.example.com",
                          headers={'Referer': 'http://example.org'})
        response = Response("http://www.example.com", request=request)
        logkws = self.formatter.spider_error(failure, request, response,
                                             self.spider)
        logline = logkws['msg'] % logkws['args']
        self.assertEqual(
            logline,
            "Spider error processing <GET http://www.example.com> (referer: http://example.org)"
        )

    def test_download_error_short(self):
        # In practice, the complete traceback is shown by passing the
        # 'exc_info' argument to the logging function
        failure = Failure(Exception())
        request = Request("http://www.example.com")
        logkws = self.formatter.download_error(failure, request, self.spider)
        logline = logkws['msg'] % logkws['args']
        self.assertEqual(logline,
                         "Error downloading <GET http://www.example.com>")

    def test_download_error_long(self):
        # In practice, the complete traceback is shown by passing the
        # 'exc_info' argument to the logging function
        failure = Failure(Exception())
        request = Request("http://www.example.com")
        logkws = self.formatter.download_error(failure, request, self.spider,
                                               "Some message")
        logline = logkws['msg'] % logkws['args']
        self.assertEqual(
            logline,
            "Error downloading <GET http://www.example.com>: Some message")

    def test_scraped(self):
        item = CustomItem()
        item['name'] = u'\xa3'
        response = Response("http://www.example.com")
        logkws = self.formatter.scraped(item, response, self.spider)
        logline = logkws['msg'] % logkws['args']
        lines = logline.splitlines()
        assert all(isinstance(x, str) for x in lines)
        self.assertEqual(
            lines,
            [u"Scraped from <200 http://www.example.com>", u'name: \xa3'])