def test_aborts_large_downloads(self, get_mock): max_length = 1000 return_mock = Mock(headers={'content-length': max_length+1}) get_mock.return_value = return_mock with self.assertRaises(download.DownloadException) as cm: download.download(EXAMPLE_COM, max_length) self.assertIn('content-length', cm.exception.message)
def test_aborts_large_downloads(get_mock): max_length = 1000 return_mock = Mock(headers={'content-length': max_length+1}) get_mock.return_value = return_mock with pytest.raises(download.DownloadException) as cm: download.download(EXAMPLE_COM, max_length) assert 'content-length' in cm.value.message
def test_aborts_large_downloads(self, get_mock): max_length = 1000 return_mock = Mock(headers={'content-length': max_length + 1}) get_mock.return_value = return_mock with self.assertRaises(download.DownloadException) as cm: download.download(EXAMPLE_COM, max_length) self.assertIn('content-length', cm.exception.message)
def test_aborts_with_invalid_headers(self, get_mock): return_mock = Mock(headers={'content-length': "invalid"}) get_mock.return_value = return_mock with self.assertRaises(download.DownloadException) as cm: download.download(EXAMPLE_COM) self.assertIn('content-length', cm.exception.message) self.assertIn('convert', cm.exception.message) self.assertIsInstance(cm.exception.parent, ValueError)
def test_aborts_with_invalid_headers(get_mock): return_mock = Mock(headers={'content-length': "invalid"}) get_mock.return_value = return_mock with pytest.raises(download.DownloadException) as cm: download.download(EXAMPLE_COM) assert 'content-length' in cm.value.message assert 'convert' in cm.value.message assert isinstance(cm.value.parent, ValueError)
def test_only_downloads_up_to_a_maximum_length(get_mock): content = Mock() max_length = 1 _mock_content(get_mock, content=content, content_length=max_length) ret = download.download(EXAMPLE_COM, max_content_length=max_length) get_mock.return_value.iter_content.assert_called_with(max_length) assert ret.content == content
def test_decodes_text_content(get_mock): content, encoding = Mock(), Mock() content.decode.return_value = 'text' _mock_content(get_mock, content=content, content_type='text/html', encoding=encoding) ret = download.download(EXAMPLE_COM) content.decode.assert_called_with(encoding, errors='ignore') assert 'text' == ret.text
def test_only_decodes_text_content(self, get_mock): content = Mock() self._mock_content(get_mock, content=content, content_type="something/else") ret = download.download(EXAMPLE_COM) # expect the empty fallback text because the decode failed self.assertEqual(None, ret.text)
def test_can_handle_empty_content(get_mock): return_mock = Mock(headers={'content-type': 'text/html'}) return_mock.iter_content.return_value = iter([]) get_mock.return_value = return_mock ret = download.download(EXAMPLE_COM) # expect the empty fallback text because we couldn't download content assert None == ret.text
def fetch_article(self): """ Fetches a title and a readable_article for the current url. It uses the scrapers module for this and only downloads the content. """ dl = download(self.url, max_content_length=settings.PYPO_MAX_CONTENT_LENGTH) self.title, self.readable_article = parse(self, content_type=dl.content_type, text=dl.text, content=dl.content)
def test_only_downloads_up_to_a_maximum_length(self, get_mock): content = Mock() max_length = 1 self._mock_content(get_mock, content=content, content_length=max_length) ret = download.download(EXAMPLE_COM, max_content_length=max_length) get_mock.return_value.iter_content.assert_called_with(max_length) self.assertEqual(ret.content, content)
def test_ignores_invalid_decode(self, get_mock): content, encoding = "üöä".encode('utf-8'), 'ascii' self._mock_content(get_mock, content=content, content_type='text/html', encoding=encoding) ret = download.download(EXAMPLE_COM) # expect the empty fallback text because the decode had only errors self.assertEqual('', ret.text)
def test_decodes_text_content(self, get_mock): content, encoding = Mock(), Mock() content.decode.return_value = 'text' self._mock_content(get_mock, content=content, content_type='text/html', encoding=encoding) ret = download.download(EXAMPLE_COM) content.decode.assert_called_with(encoding, errors='ignore') self.assertEqual('text', ret.text)
def fetch_article(self): """ Fetches a title and a readable_article for the current url. It uses the scrapers module for this and only downloads the content. """ try: dl = download(self.url, max_content_length=settings.PYPO_MAX_CONTENT_LENGTH) except DownloadException: # TODO show a message that the download failed? self.title = self.url self.readable_article = None else: self.title, self.readable_article = parse(self, content_type=dl.content_type, text=dl.text, content=dl.content)
def test_uses_request_to_start_the_download(self, get_mock): get_mock.side_effect = requests.RequestException with self.assertRaises(download.DownloadException): download.download(EXAMPLE_COM) get_mock.assert_called_with(EXAMPLE_COM, stream=True)
def test_only_decodes_text_content(get_mock): content = Mock() _mock_content(get_mock, content=content, content_type="something/else") ret = download.download(EXAMPLE_COM) # expect the empty fallback text because the decode failed assert None == ret.text
def test_ignores_invalid_decode(get_mock): content, encoding = "üöä".encode('utf-8'), 'ascii' _mock_content(get_mock, content=content, content_type='text/html', encoding=encoding) ret = download.download(EXAMPLE_COM) # expect the empty fallback text because the decode had only errors assert '' == ret.text
def test_guess_encoding_from_content(get_mock): content = '<meta charset="UTF-8"/>fübar' _mock_content(get_mock, content=content.encode('utf-8'), content_type='text/html', encoding='latin1') ret = download.download(EXAMPLE_COM) assert content == ret.text
def test_uses_request_to_start_the_download(get_mock): get_mock.side_effect = requests.RequestException with pytest.raises(download.DownloadException): download.download(EXAMPLE_COM) get_mock.assert_called_with(EXAMPLE_COM, stream=True, verify=False)