def test_save_scraped_document__ignore_directories(self): html = lxml.html.document_fromstring(self.test_html) with test_utils.tempdir() as tmp: response = mock.MagicMock() response.url = '/doc/to/' response.status = 200 response.read.return_value = self.test_html.encode('utf8') response.info().get_content_type.return_value = 'text/html' bake.save_scraped_document(tmp, response.url, response, response.read.return_value, html, set()) self.assertEqual([], os.listdir(tmp))
def test_save_scraped_document__file(self): html = lxml.html.document_fromstring(self.test_html) with test_utils.tempdir() as tmp: response = mock.MagicMock() response.url = '/path/to/file' response.status = 200 response.read.return_value = self.test_html.encode('utf8') response.info().get_content_type.return_value = 'text/html' bake.save_scraped_document(tmp, response.url, response, response.read.return_value, html, set()) filename = path.join(tmp, 'path/to/file.html') self.assertTrue(path.exists(filename)) self.assertLines(open(filename).read(), self.expected_html)
def test_save_scraped_document__binary_content(self): html = lxml.html.document_fromstring(self.test_html) with test_utils.tempdir() as tmp: response = mock.MagicMock() response.url = '/resources/something.png' response.status = 200 expected_contents = 'IMAGE!'.encode('utf8') response.read.return_value = expected_contents response.info().get_content_type.return_value = 'image/png' bake.save_scraped_document( tmp, response.url, response, expected_contents, html, set()) actual_contents = open(path.join(tmp, 'resources/something.png'), 'rb').read() self.assertEqual(expected_contents, actual_contents)