def test_download_get_raises_some_RequestException(self, caplog): caplog.set_level(INFO) session = requests.Session() mock_200 = MockResponse200('') with patch.object(session, 'get') as mock_get, \ patch('wasapi_client.write_file') as mock_write_file: # Raise a subclass of RequestException on first download attempt; # mock a successful response on the second attempt mock_get.side_effect = [ requests.exceptions.ConnectionError(), mock_200 ] wc.download_file(self.data_file, session, self.filename) # Check all locations were tried. calls = [ call(self.locations[0], stream=True), call(self.locations[1], stream=True) ] mock_get.assert_has_calls(calls) mock_write_file.assert_called_once_with(mock_200, self.filename) # Verify requests exception was caught and logged. for msg in ('Error downloading http://loc1/blah.warc.gz:', 'http://loc2/blah.warc.gz: 200 OK'): assert msg in caplog.text
def test_download_file_not_200(self): session = requests.Session() mock_403 = MockResponse403() with patch.object(session, 'get', return_value=mock_403) as mock_get, \ pytest.raises(wc.WASAPIDownloadError) as err: wc.download_file(self.data_file, session, self.filename) for item in (str(self.locations), self.filename): assert item in err.value.args[0] # Check all locations were tried. calls = [call(self.locations[0], stream=True), call(self.locations[1], stream=True)] mock_get.assert_has_calls(calls)
def test_download_file_OSError(self): session = requests.Session() mock_200 = MockResponse200('') with patch.object(session, 'get', return_value=mock_200) as mock_get, \ patch('wasapi_client.write_file') as mock_write_file: mock_write_file.side_effect = OSError with pytest.raises(wc.WASAPIDownloadError) as err: wc.download_file(self.data_file, session, self.filename) for item in (str(self.locations), self.filename): assert item in err.value.args[0] # Check we only tried downloading files until successful download. mock_get.assert_called_once_with(self.locations[0], stream=True) mock_write_file.assert_called_once_with(mock_200, self.filename)
def test_download_uses_pre_signed_url(self): """Test that an s3 URL uses requests.get, not a session.""" locations = ['https://data.s3.amazonaws.com/warcs/blah.warc.gz?Signature=xyz', 'http://loc2/blah.warc.gz'] filename = 'blah.warc.gz' checksums = {'md5': '72b484a2610cb54ec22e48c8104ba3bd'} data_file = wc.DataFile(locations, filename, checksums, 123456) mock_200 = MockResponse200('') with patch('requests.get', return_value=mock_200) as mock_get, \ patch('wasapi_client.write_file') as mock_write_file: wc.download_file(data_file, requests.Session(), filename) # Check we attempted one download via requests.get and wrote the file. mock_get.assert_called_once_with(locations[0], stream=True) mock_write_file.assert_called_once_with(mock_200, filename)
def test_download_check_exists_true(self): """Test a file already existing on the filesystem is not downloaded.""" with patch('wasapi_client.check_exists', return_value=True), \ patch('requests.Session', autospec=True) as mock_session: file_data = wc.download_file(self.data_file, mock_session, self.filename) # Check `verified` has been set True on the FileData instance. assert file_data.verified # Check that no get request was made. assert not mock_session.get.called
def test_download_file_200(self): session = requests.Session() mock_200 = MockResponse200('') with patch.object(session, 'get', return_value=mock_200) as mock_get, \ patch('wasapi_client.write_file') as mock_write_file: file_data = wc.download_file(self.data_file, session, self.filename) # Check we only tried downloading files until successful download. mock_get.assert_called_once_with(self.locations[0], stream=True) mock_write_file.assert_called_once_with(mock_200, self.filename) assert not file_data.verified