def poke(self, context): from airflow.providers.apache.hdfs.hooks.webhdfs import WebHDFSHook hook = WebHDFSHook(self.webhdfs_conn_id) self.log.info('Poking for file %s', self.filepath) return hook.check_for_path(hdfs_path=self.filepath)
class TestWebHDFSHook(unittest.TestCase): def setUp(self): self.webhdfs_hook = WebHDFSHook() @patch('airflow.providers.apache.hdfs.hooks.webhdfs.InsecureClient') @patch( 'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections', return_value=[ Connection(host='host_1', port=123), Connection(host='host_2', port=321, login='******') ], ) @patch("airflow.providers.apache.hdfs.hooks.webhdfs.socket") def test_get_conn(self, socket_mock, mock_get_connections, mock_insecure_client): mock_insecure_client.side_effect = [ HdfsError('Error'), mock_insecure_client.return_value ] socket_mock.socket.return_value.connect_ex.return_value = 0 conn = self.webhdfs_hook.get_conn() mock_insecure_client.assert_has_calls([ call( f'http://{connection.host}:{connection.port}', user=connection.login, ) for connection in mock_get_connections.return_value ]) mock_insecure_client.return_value.status.assert_called_once_with('/') assert conn == mock_insecure_client.return_value @patch('airflow.providers.apache.hdfs.hooks.webhdfs.KerberosClient', create=True) @patch( 'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections', return_value=[Connection(host='host_1', port=123)], ) @patch( 'airflow.providers.apache.hdfs.hooks.webhdfs._kerberos_security_mode', return_value=True) @patch("airflow.providers.apache.hdfs.hooks.webhdfs.socket") def test_get_conn_kerberos_security_mode(self, socket_mock, mock_kerberos_security_mode, mock_get_connections, mock_kerberos_client): socket_mock.socket.return_value.connect_ex.return_value = 0 conn = self.webhdfs_hook.get_conn() connection = mock_get_connections.return_value[0] mock_kerberos_client.assert_called_once_with( f'http://{connection.host}:{connection.port}') assert conn == mock_kerberos_client.return_value @patch( 'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook._find_valid_server', return_value=None) def test_get_conn_no_connection_found(self, mock_get_connection): with pytest.raises(AirflowWebHDFSHookException): self.webhdfs_hook.get_conn() @patch('airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_conn') def test_check_for_path(self, mock_get_conn): hdfs_path = 'path' exists_path = self.webhdfs_hook.check_for_path(hdfs_path) mock_get_conn.assert_called_once_with() mock_status = mock_get_conn.return_value.status mock_status.assert_called_once_with(hdfs_path, strict=False) assert exists_path == bool(mock_status.return_value) @patch('airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_conn') def test_load_file(self, mock_get_conn): source = 'source' destination = 'destination' self.webhdfs_hook.load_file(source, destination) mock_get_conn.assert_called_once_with() mock_upload = mock_get_conn.return_value.upload mock_upload.assert_called_once_with(hdfs_path=destination, local_path=source, overwrite=True, n_threads=1) def test_simple_init(self): hook = WebHDFSHook() assert hook.proxy_user is None def test_init_proxy_user(self): hook = WebHDFSHook(proxy_user='******') assert 'someone' == hook.proxy_user
def setUp(self): self.webhdfs_hook = WebHDFSHook()
def test_init_proxy_user(self): hook = WebHDFSHook(proxy_user='******') assert 'someone' == hook.proxy_user
def test_simple_init(self): hook = WebHDFSHook() assert hook.proxy_user is None
def test_init_proxy_user(self): hook = WebHDFSHook(proxy_user='******') self.assertEqual('someone', hook.proxy_user)
def test_simple_init(self): hook = WebHDFSHook() self.assertIsNone(hook.proxy_user)
class TestWebHDFSHook(unittest.TestCase): def setUp(self): self.webhdfs_hook = WebHDFSHook() @patch('airflow.providers.apache.hdfs.hooks.webhdfs.InsecureClient') @patch( 'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections', return_value=[ Connection(host='host_1', port=123), Connection(host='host_2', port=321, login='******') ]) def test_get_conn(self, mock_get_connections, mock_insecure_client): mock_insecure_client.side_effect = [ HdfsError('Error'), mock_insecure_client.return_value ] conn = self.webhdfs_hook.get_conn() mock_insecure_client.assert_has_calls([ call('http://{host}:{port}'.format(host=connection.host, port=connection.port), user=connection.login) for connection in mock_get_connections.return_value ]) mock_insecure_client.return_value.status.assert_called_once_with('/') self.assertEqual(conn, mock_insecure_client.return_value) @patch('airflow.providers.apache.hdfs.hooks.webhdfs.KerberosClient', create=True) @patch( 'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections', return_value=[Connection(host='host_1', port=123)]) @patch( 'airflow.providers.apache.hdfs.hooks.webhdfs._kerberos_security_mode', return_value=True) def test_get_conn_kerberos_security_mode(self, mock_kerberos_security_mode, mock_get_connections, mock_kerberos_client): conn = self.webhdfs_hook.get_conn() connection = mock_get_connections.return_value[0] mock_kerberos_client.assert_called_once_with( 'http://{host}:{port}'.format(host=connection.host, port=connection.port)) self.assertEqual(conn, mock_kerberos_client.return_value) @patch( 'airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_connections', return_value=[]) def test_get_conn_no_connection_found(self, mock_get_connection): with self.assertRaises(AirflowWebHDFSHookException): self.webhdfs_hook.get_conn() @patch('airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_conn') def test_check_for_path(self, mock_get_conn): hdfs_path = 'path' exists_path = self.webhdfs_hook.check_for_path(hdfs_path) mock_get_conn.assert_called_once_with() mock_status = mock_get_conn.return_value.status mock_status.assert_called_once_with(hdfs_path, strict=False) self.assertEqual(exists_path, bool(mock_status.return_value)) @patch('airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook.get_conn') def test_load_file(self, mock_get_conn): source = 'source' destination = 'destination' self.webhdfs_hook.load_file(source, destination) mock_get_conn.assert_called_once_with() mock_upload = mock_get_conn.return_value.upload mock_upload.assert_called_once_with(hdfs_path=destination, local_path=source, overwrite=True, n_threads=1) def test_simple_init(self): hook = WebHDFSHook() self.assertIsNone(hook.proxy_user) def test_init_proxy_user(self): hook = WebHDFSHook(proxy_user='******') self.assertEqual('someone', hook.proxy_user)