def test_auth(self): """Test that the auth property returns a dictionary of arguments for ftplib.FTP.login() if the necessary information is provided """ # No authentication provider = verify_urls.FTPProvider('test', {}) self.assertEqual(provider.auth, {'user': '', 'passwd': ''}) # Authentication info provided provider = verify_urls.FTPProvider('test', {'username': '******', 'password': '******'}) provider_auth = provider.auth self.assertEqual(provider_auth, {'user': '******', 'passwd': 'pass'}) # Return existing auth self.assertIs(provider.auth, provider_auth)
def test_ftp_connect(self): """Test FTP connection in a standard case""" provider = verify_urls.FTPProvider('test', {'url': 'ftp://foo'}) with mock.patch('geospaas_harvesting.verify_urls.FTPProvider.ftp_client', new_callable=mock.PropertyMock) as mock_ftp_client: provider.ftp_connect() mock_ftp_client.return_value.connect.assert_called_with('foo', timeout=5) mock_ftp_client.return_value.login.assert_called_with(user='', passwd='')
def test_read_config(self): """Should read the provider configuration from a YAML file""" config = textwrap.dedent('''--- podaac: url: 'https://opendap.jpl.nasa.gov/opendap/' scihub: url: 'https://scihub.copernicus.eu/' username: !ENV 'COPERNICUS_OPEN_HUB_USERNAME' password: !ENV 'COPERNICUS_OPEN_HUB_PASSWORD' creodias: url: 'https://zipper.creodias.eu/' username: !ENV 'CREODIAS_USERNAME' password: !ENV 'CREODIAS_PASSWORD' token_url: 'https://auth.creodias.eu/auth/realms/DIAS/protocol/openid-connect/token' client_id: 'CLOUDFERRO_PUBLIC' throttle: 1 auth_renew: 36000 rtofs: url: 'ftp://ftpprd.ncep.noaa.gov/pub/data/nccf/com/rtofs/prod/' ''') environment = { 'COPERNICUS_OPEN_HUB_USERNAME': '******', 'COPERNICUS_OPEN_HUB_PASSWORD': '******', 'CREODIAS_USERNAME': '******', 'CREODIAS_PASSWORD': '******', } # we check that get_auth() is called with the right arguments # by replacing its output by its arguments with mock.patch('geospaas_harvesting.verify_urls.open', mock.mock_open(read_data=config)), \ mock.patch('os.environ', environment): providers = verify_urls.read_config('foo.yml') self.assertListEqual(providers, [ verify_urls.HTTPProvider('podaac', { 'url': 'https://opendap.jpl.nasa.gov/opendap/', }), verify_urls.HTTPProvider('scihub', { 'url': 'https://scihub.copernicus.eu/', 'username': '******', 'password': '******' }), verify_urls.HTTPProvider('creodias', { 'url': 'https://zipper.creodias.eu/', 'username': '******', 'password': '******', 'token_url': 'https://auth.creodias.eu/auth/realms/DIAS/protocol/' 'openid-connect/token', 'client_id': 'CLOUDFERRO_PUBLIC', 'throttle': 1, 'auth_renew': 36000 }), verify_urls.FTPProvider('rtofs', { 'url': 'ftp://ftpprd.ncep.noaa.gov/pub/data/nccf/com/rtofs/prod/' }) ])
def test_ftp_connect_with_auth(self): """Test FTP connection with authentication""" provider = verify_urls.FTPProvider('test', { 'url': 'ftp://foo', 'username': '******', 'password': '******' }) with mock.patch('geospaas_harvesting.verify_urls.FTPProvider.ftp_client', new_callable=mock.PropertyMock) as mock_ftp_client: provider.ftp_connect() mock_ftp_client.return_value.connect.assert_called_with('foo', timeout=5) mock_ftp_client.return_value.login.assert_called_with(user='******', passwd='pass')
def test_check_url_absent(self): """Test checking a URL that points to an non-existing file""" mock_dataset_uri = mock.Mock() mock_dataset_uri.uri = 'ftp://foo/bar/baz.nc' with mock.patch('geospaas_harvesting.verify_urls.FTPProvider.ftp_client', new_callable=mock.PropertyMock) as mock_ftp_client: mock_ftp_client.return_value.nlst.return_value = [] provider = verify_urls.FTPProvider('test', {'url': 'ftp://foo'}) self.assertEqual( provider.check_url(mock_dataset_uri), verify_urls.ABSENT)
def test_ftp_client(self): """Test that an FTP client is provided by the ftp_client property""" provider = verify_urls.FTPProvider('test', {}) with mock.patch.object(provider, 'ftp_connect') as mock_ftp_connect: ftp_client = provider.ftp_client self.assertIsInstance(provider.ftp_client, ftplib.FTP) mock_ftp_connect.assert_called_once() mock_ftp_connect.reset_mock() # Check that the client is re-used on following calls self.assertIs(provider.ftp_client, ftp_client) mock_ftp_connect.assert_not_called()
def test_check_providers(self): """Should run URL checks for each provider in a separate process. If an exception is raised in one of the sub-processes, check_providers() should return False and the traceback of the exception should be logged """ providers = [ verify_urls.HTTPProvider('scihub', { 'url': 'https://scihub.copernicus.eu/', 'username': '******', 'password': '******', 'throttle': 0 }), verify_urls.HTTPProvider('podaac', { 'url': 'https://opendap.jpl.nasa.gov/opendap/', 'username': '******', 'password': '******', 'throttle': 0 }), verify_urls.FTPProvider('rtofs', { 'url': 'ftp://ftpprd.ncep.noaa.gov/pub/data/nccf/com/rtofs/prod/' }), ] with mock.patch('concurrent.futures.ProcessPoolExecutor') as mock_pool, \ mock.patch('geospaas_harvesting.verify_urls.datetime') as mock_datetime, \ mock.patch('geospaas_harvesting.verify_urls.' 'HTTPProvider.check_all_urls') as mock_http_check, \ mock.patch('geospaas_harvesting.verify_urls.' 'FTPProvider.check_all_urls') as mock_ftp_check, \ mock.patch('concurrent.futures.as_completed', iter): mock_executor = mock_pool.return_value.__enter__.return_value mock_datetime.now.return_value.strftime.return_value = 'time' self.assertTrue(verify_urls.check_providers('foo', providers)) mock_executor.submit.assert_has_calls(( mock.call( mock_http_check, os.path.join('foo', 'scihub_stale_urls_time.txt')), mock.call( mock_http_check, os.path.join('foo', 'podaac_stale_urls_time.txt')), mock.call( mock_ftp_check, os.path.join('foo', 'rtofs_stale_urls_time.txt')) ), any_order=True) self.assertEqual(len(mock_executor.submit.call_args_list), 3) mock_executor.submit.return_value.result.side_effect = AttributeError with self.assertLogs(verify_urls.logger, level=logging.ERROR): self.assertFalse(verify_urls.check_providers('foo', providers))
def test_ftp_connect_failing_after_retry(self): """Test FTP connection with retries, failing in the end""" provider = verify_urls.FTPProvider('test', {'url': 'ftp://foo'}) with mock.patch('geospaas_harvesting.verify_urls.FTPProvider.ftp_client', new_callable=mock.PropertyMock) as mock_ftp_client, \ mock.patch('time.sleep') as mock_sleep: mock_ftp_client.return_value.connect.side_effect = socket.timeout with self.assertRaises(socket.timeout), \ self.assertLogs(verify_urls.logger, level=logging.ERROR): provider.ftp_connect() self.assertEqual(mock_ftp_client.return_value.connect.call_count, 5) mock_ftp_client.return_value.login.assert_not_called() self.assertListEqual( mock_sleep.call_args_list, [mock.call(5), mock.call(6), mock.call(7), mock.call(8)])
def test_check_url_failing_after_retries(self): """Test when checking a URL fails after retries""" mock_dataset_uri = mock.Mock() mock_dataset_uri.uri = 'ftp://foo/bar/baz.nc' with mock.patch('geospaas_harvesting.verify_urls.FTPProvider.ftp_client', new_callable=mock.PropertyMock) as mock_ftp_client, \ mock.patch('time.sleep') as mock_sleep: mock_ftp_client.return_value.nlst.side_effect = ConnectionResetError provider = verify_urls.FTPProvider('test', {'url': 'ftp://foo'}) with self.assertRaises(ConnectionResetError), \ self.assertLogs(verify_urls.logger, level=logging.ERROR): provider.check_url(mock_dataset_uri) self.assertEqual(mock_ftp_client.return_value.nlst.call_count, 5) self.assertEqual(mock_ftp_client.return_value.connect.call_count, 4) self.assertEqual(mock_sleep.call_count, 4)
def test_ftp_connect_ok_after_retry(self): """Test FTP connection with retries, successful in the end""" provider = verify_urls.FTPProvider('test', {'url': 'ftp://foo'}) with mock.patch('geospaas_harvesting.verify_urls.FTPProvider.ftp_client', new_callable=mock.PropertyMock) as mock_ftp_client, \ mock.patch('time.sleep') as mock_sleep: mock_ftp_client.return_value.connect.side_effect = (socket.timeout(),) * 3 + ('220',) provider.ftp_connect() mock_ftp_client.return_value.connect.assert_called_with('foo', timeout=5) self.assertEqual(mock_ftp_client.return_value.connect.call_count, 4) mock_ftp_client.return_value.login.assert_called_once_with(user='', passwd='') self.assertListEqual( mock_sleep.call_args_list, [mock.call(5), mock.call(6), mock.call(7)])
def test_check_url_ok_after_retries(self): """Test checking a URL successfully after some retries""" mock_dataset_uri = mock.Mock() mock_dataset_uri.uri = 'ftp://foo/bar/baz.nc' with mock.patch('geospaas_harvesting.verify_urls.FTPProvider.ftp_client', new_callable=mock.PropertyMock) as mock_ftp_client, \ mock.patch('time.sleep') as mock_sleep: mock_ftp_client.return_value.nlst.side_effect = ( (ConnectionResetError,) * 3 + (verify_urls.ABSENT,)) provider = verify_urls.FTPProvider('test', {'url': 'ftp://foo'}) self.assertEqual( provider.check_url(mock_dataset_uri), verify_urls.ABSENT) self.assertEqual(mock_ftp_client.return_value.nlst.call_count, 4) self.assertEqual(mock_ftp_client.return_value.connect.call_count, 3) self.assertEqual(mock_sleep.call_count, 3)
def test_check_all_urls(self): """Test that the right URLs are written to the output file""" provider = verify_urls.FTPProvider('test', {'url': 'ftp://foo'}) with mock.patch('geospaas_harvesting.verify_urls.DatasetURI.objects') as mock_manager, \ mock.patch.object(provider, 'check_url') as mock_check_url, \ mock.patch.object(provider, 'write_stale_url') as mock_write: mock_manager.filter.return_value.iterator.return_value = iter([ mock.Mock(id=1, uri='ftp://foo/bar/baz1.nc'), mock.Mock(id=2, uri='ftp://foo/bar/baz2.nc'), mock.Mock(id=3, uri='ftp://foo/bar/baz3.nc'), ]) mock_check_url.side_effect = (verify_urls.ABSENT, verify_urls.PRESENT, 'http_503') with self.assertLogs(verify_urls.logger): provider.check_all_urls('output.txt') self.assertListEqual(mock_write.call_args_list, [ mock.call('output.txt', verify_urls.ABSENT, 1, 'ftp://foo/bar/baz1.nc'), mock.call('output.txt', 'http_503', 3, 'ftp://foo/bar/baz3.nc'), ])
def test_instantiation(self): """Test that the attributes are correctly initialized""" provider = verify_urls.FTPProvider('test', {'foo': 'bar'}) self.assertEqual(provider.name, 'test') self.assertEqual(provider.config, {'foo': 'bar'}) self.assertEqual(provider._ftp_client, None)