Example #1
0
    def test_save_scraped_document__ignore_directories(self):
        html = lxml.html.document_fromstring(self.test_html)
        with test_utils.tempdir() as tmp:
            response = mock.MagicMock()
            response.url = '/doc/to/'
            response.status = 200
            response.read.return_value = self.test_html.encode('utf8')
            response.info().get_content_type.return_value = 'text/html'

            bake.save_scraped_document(tmp, response.url, response,
                                       response.read.return_value, html, set())
            self.assertEqual([], os.listdir(tmp))
Example #2
0
    def test_save_scraped_document__file(self):
        html = lxml.html.document_fromstring(self.test_html)
        with test_utils.tempdir() as tmp:
            response = mock.MagicMock()
            response.url = '/path/to/file'
            response.status = 200
            response.read.return_value = self.test_html.encode('utf8')
            response.info().get_content_type.return_value = 'text/html'

            bake.save_scraped_document(tmp, response.url, response,
                                       response.read.return_value, html, set())
            filename = path.join(tmp, 'path/to/file.html')
            self.assertTrue(path.exists(filename))
            self.assertLines(open(filename).read(), self.expected_html)
Example #3
0
    def test_save_scraped_document__binary_content(self):
        html = lxml.html.document_fromstring(self.test_html)
        with test_utils.tempdir() as tmp:
            response = mock.MagicMock()
            response.url = '/resources/something.png'
            response.status = 200
            expected_contents = 'IMAGE!'.encode('utf8')
            response.read.return_value = expected_contents
            response.info().get_content_type.return_value = 'image/png'

            bake.save_scraped_document(
                tmp, response.url, response, expected_contents, html, set())
            actual_contents = open(path.join(tmp, 'resources/something.png'), 'rb').read()
            self.assertEqual(expected_contents, actual_contents)