def test_mysql_to_s3_json(mocker, mysql, s3, s3_init): task = DbToFsOperator( task_id="test_id", src_db_hook=MysqlHook(conn_id="this_conn_will_be_mocked"), src_query="SELECT * FROM dummy", output_filetype="json", dest_path="s3://testbucket/test.json", dest_fs_hook=S3Hook(conn_id="this_conn_will_be_mocked_2"), ) task.execute(context={}) # Assert output with open(os.path.join(os.path.dirname(__file__), "testdata.csv")) as local_file: csv_reader = csv.reader(local_file) header = next(csv_reader) rows = list(csv_reader) local_data = [{header[i]: col for i, col in enumerate(row)} for row in rows] # convert to dict s3fs = S3Hook().get_conn() with s3fs.open("s3://testbucket/test.json", mode="r") as s3_file: s3_data = json.load(s3_file) # CSV reader makes everything string, so cast all columns to string for comparing values s3_data = [{str(k): str(v) for k, v in row.items()} for row in s3_data] assert local_data == s3_data
def test_postgres_to_s3_json(mocker, postgres, s3, s3_init): mocker.patch.object( PostgresHook, "get_connection", return_value=Connection( conn_id="test", conn_type="postgres", host="localhost", login="******", password="******", port=postgres.ports["5432/tcp"][0], ), ) mocker.patch.object( S3Hook, "get_conn", return_value=S3FileSystem( key="secretaccess", secret="secretkey", client_kwargs={ "endpoint_url": f'http://localhost:{s3.ports["9000/tcp"][0]}' }, ), ) task = DbToFsOperator( task_id="test_id", src_db_hook=PostgresHook(conn_id="this_conn_will_be_mocked"), src_query="SELECT * FROM dummy", output_filetype="json", dest_path="s3://testbucket/test.json", dest_fs_hook=S3Hook(conn_id="this_conn_will_be_mocked_2"), ) task.execute(context={}) # Assert output with open(os.path.join(os.path.dirname(__file__), "testdata.csv")) as local_file: csv_reader = csv.reader(local_file) header = next(csv_reader) rows = list(csv_reader) local_data = [{header[i]: col for i, col in enumerate(row)} for row in rows] # convert to dict s3fs = S3Hook().get_conn() with s3fs.open("s3://testbucket/test.json", mode="r") as s3_file: s3_data = json.load(s3_file) # CSV reader makes everything string, so cast all columns to string for comparing values s3_data = [{str(k): str(v) for k, v in row.items()} for row in s3_data] assert local_data == s3_data
def test_postgres_to_s3_csv(mocker, postgres, s3, s3_init): mocker.patch.object( PostgresHook, "get_connection", return_value=Connection( conn_id="test", conn_type="postgres", host="localhost", login="******", password="******", port=postgres.ports["5432/tcp"][0], ), ) mocker.patch.object( S3Hook, "get_conn", return_value=S3FileSystem( key="secretaccess", secret="secretkey", client_kwargs={ "endpoint_url": f'http://localhost:{s3.ports["9000/tcp"][0]}' }, ), ) task = DbToFsOperator( task_id="test_id", src_db_hook=PostgresHook(conn_id="this_conn_will_be_mocked"), src_query="SELECT * FROM dummy", output_filetype="csv", dest_path="s3://testbucket/test.csv", dest_fs_hook=S3Hook(conn_id="this_conn_will_be_mocked_2"), ) task.execute(context={}) # Assert output with open(os.path.join(os.path.dirname(__file__), "testdata.csv")) as local_file: csv_reader = csv.reader(local_file) local_data = list(csv_reader) s3fs = S3Hook().get_conn() with s3fs.open("s3://testbucket/test.csv", mode="r") as s3_file: csv_reader = csv.reader(s3_file) s3_data = list(csv_reader) assert local_data == s3_data
def test_walk(self, s3_client, s3_mock_dir, mock_data_dir): """Tests the `walk` method.""" with S3Hook() as hook: entries = list(hook.walk(s3_mock_dir)) pytest.helpers.assert_walk_equal(entries, os.walk(mock_data_dir))
def test_exists(self, s3_mock_dir): """Tests the `exists` method.""" with S3Hook() as hook: assert hook.exists(posixpath.join(s3_mock_dir, "subdir")) assert hook.exists(posixpath.join(s3_mock_dir, "test.txt")) assert not hook.exists( posixpath.join(s3_mock_dir, "non-existing.txt"))
def test_makedirs(self, s3_client, s3_temp_dir): """Tests the `mkdir` method with mode parameter.""" dir_path = posixpath.join(s3_temp_dir, "some", "nested", "dir") with S3Hook() as hook: hook.makedirs(dir_path, mode=0o750) assert s3_client.exists(dir_path)
def test_open_read(self, s3_mock_dir): """Tests reading of a file using the `open` method.""" file_path = posixpath.join(s3_mock_dir, "test.txt") with S3Hook() as hook: with hook.open(file_path) as file_: content = file_.read() assert content == b"Test file\n"
def test_mkdir(self, s3_client, s3_temp_dir): """Tests the `mkdir` method with mode parameter.""" dir_path = posixpath.join(s3_temp_dir, "subdir") assert not s3_client.exists(dir_path) with S3Hook() as hook: hook.mkdir(dir_path, mode=0o750) assert s3_client.exists(dir_path)
def test_rm(self, s3_client, s3_mock_dir): """Tests the `rm` method.""" file_path = posixpath.join(s3_mock_dir, "test.txt") assert s3_client.exists(file_path) with S3Hook() as hook: hook.rm(file_path) assert not s3_client.exists(file_path)
def test_open_write(self, s3_client, s3_temp_dir): """Tests writing of a file using the `open` method.""" file_path = posixpath.join(s3_temp_dir, "test2.txt") assert not s3_client.exists(file_path) with S3Hook() as hook: with hook.open(file_path, "wb") as file_: file_.write(b"Test file\n") assert s3_client.exists(file_path)
def test_rmtree(self, s3_client, s3_mock_dir): """Tests the `rmtree` method.""" dir_path = posixpath.join(s3_mock_dir, "subdir") assert s3_client.exists(dir_path) with S3Hook() as hook: hook.rmtree(dir_path) s3_client.invalidate_cache(dir_path) assert not s3_client.exists(dir_path)
def test_makedirs_exists(self, s3_client, s3_temp_dir): """Tests the `mkdir` method with exists_ok parameter.""" dir_path = posixpath.join(s3_temp_dir, "some", "nested", "dir") with S3Hook() as hook: hook.makedirs(dir_path, exist_ok=False) with pytest.raises(IOError): hook.makedirs(dir_path, exist_ok=False) s3_client.invalidate_cache(dir_path) hook.makedirs(dir_path, exist_ok=True)
def test_mkdir_exists(self, s3_client, s3_temp_dir): """Tests the `mkdir` method with the exists_ok parameter.""" dir_path = posixpath.join(s3_temp_dir, "subdir") assert not s3_client.exists(dir_path) with S3Hook() as hook: hook.mkdir(dir_path, exist_ok=False) with pytest.raises(IOError): hook.mkdir(dir_path, exist_ok=False) hook.mkdir(dir_path, exist_ok=True)
def test_mysql_to_s3_csv(mocker, mysql, s3, s3_init): task = DbToFsOperator( task_id="test_id", src_db_hook=MysqlHook(conn_id="this_conn_will_be_mocked"), src_query="SELECT * FROM dummy", output_filetype="csv", dest_path="s3://testbucket/test.csv", dest_fs_hook=S3Hook(conn_id="this_conn_will_be_mocked_2"), ) task.execute(context={}) # Assert output with open(os.path.join(os.path.dirname(__file__), "testdata.csv")) as local_file: csv_reader = csv.reader(local_file) local_data = list(csv_reader) s3fs = S3Hook().get_conn() with s3fs.open("s3://testbucket/test.csv", mode="r") as s3_file: csv_reader = csv.reader(s3_file) s3_data = list(csv_reader) assert local_data == s3_data
def test_glob(self, s3_client, local_mock_dir, s3_temp_dir, test_dag): """Tests copying of files using glob pattern.""" dest_hook = S3Hook() task = operators.CopyFileOperator(src_path=posixpath.join( local_mock_dir, "*.csv"), dest_path=s3_temp_dir, dest_hook=dest_hook, task_id="copy_task", dag=test_dag) _run_task(task, test_dag) assert dest_hook.exists(posixpath.join(s3_temp_dir, "test.csv"))
def test_single(self, s3_client, s3_mock_dir, test_dag): """Tests deletion of a single directory.""" hook = S3Hook() dir_path = posixpath.join(s3_mock_dir, "subdir") assert hook.exists(dir_path) task = operators.DeleteTreeOperator(path=dir_path, hook=hook, task_id="copy_task", dag=test_dag) _run_task(task, test_dag) assert not hook.exists(dir_path)
def test_single(self, s3_client, s3_mock_dir, test_dag): """Tests deletion of a single file.""" hook = S3Hook() file_path = posixpath.join(s3_mock_dir, "test.txt") assert hook.exists(file_path) task = operators.DeleteFileOperator(path=file_path, hook=hook, task_id="copy_task", dag=test_dag) _run_task(task, test_dag) assert not hook.exists(file_path)
def test_glob(self, s3_client, s3_mock_dir, test_dag): """Tests deletion of multiple files with glob.""" hook = S3Hook() assert hook.exists(posixpath.join(s3_mock_dir, "subdir")) task = operators.DeleteTreeOperator(path=posixpath.join( s3_mock_dir, "sub*"), hook=hook, task_id="copy_task", dag=test_dag) _run_task(task, test_dag) assert not hook.exists(posixpath.join(s3_mock_dir, "subdir"))
def test_single(self, s3_client, local_mock_dir, s3_temp_dir, test_dag): """Tests copying of single file.""" dest_hook = S3Hook() dest_path = posixpath.join(s3_temp_dir, "test.txt") assert not dest_hook.exists(dest_path) task = operators.CopyFileOperator(src_path=posixpath.join( local_mock_dir, "test.txt"), dest_path=dest_path, dest_hook=dest_hook, task_id="copy_task", dag=test_dag) _run_task(task, test_dag) assert dest_hook.exists(dest_path)
def test_glob(self, s3_client, s3_mock_dir, test_dag): """Tests deletion of multiple files with glob.""" hook = S3Hook() assert hook.exists(posixpath.join(s3_mock_dir, "test.txt")) assert hook.exists(posixpath.join(s3_mock_dir, "test.csv")) task = operators.DeleteFileOperator(path=posixpath.join( s3_mock_dir, "test.*"), hook=hook, task_id="copy_task", dag=test_dag) _run_task(task, test_dag) assert not hook.exists(posixpath.join(s3_mock_dir, "test.txt")) assert not hook.exists(posixpath.join(s3_mock_dir, "test.csv")) # Check if other file was not deleted. assert hook.exists(posixpath.join(s3_mock_dir, "other.txt"))
def test_isdir(self, s3_mock_dir): """Tests the `isdir` method.""" with S3Hook() as hook: assert hook.isdir(posixpath.join(s3_mock_dir, "subdir")) assert not hook.isdir(posixpath.join(s3_mock_dir, "test.txt"))
def test_listdir(self, s3_mock_dir, mock_data_dir): """Tests the `listdir` method.""" with S3Hook() as hook: assert set(hook.listdir(s3_mock_dir)) == set( os.listdir(mock_data_dir))