Example #1
0
def test_mysql_to_s3_json(mocker, mysql, s3, s3_init):
    task = DbToFsOperator(
        task_id="test_id",
        src_db_hook=MysqlHook(conn_id="this_conn_will_be_mocked"),
        src_query="SELECT * FROM dummy",
        output_filetype="json",
        dest_path="s3://testbucket/test.json",
        dest_fs_hook=S3Hook(conn_id="this_conn_will_be_mocked_2"),
    )
    task.execute(context={})

    # Assert output
    with open(os.path.join(os.path.dirname(__file__),
                           "testdata.csv")) as local_file:
        csv_reader = csv.reader(local_file)
        header = next(csv_reader)
        rows = list(csv_reader)
        local_data = [{header[i]: col
                       for i, col in enumerate(row)}
                      for row in rows]  # convert to dict

    s3fs = S3Hook().get_conn()
    with s3fs.open("s3://testbucket/test.json", mode="r") as s3_file:
        s3_data = json.load(s3_file)

    # CSV reader makes everything string, so cast all columns to string for comparing values
    s3_data = [{str(k): str(v) for k, v in row.items()} for row in s3_data]
    assert local_data == s3_data
Example #2
0
def test_postgres_to_s3_json(mocker, postgres, s3, s3_init):
    mocker.patch.object(
        PostgresHook,
        "get_connection",
        return_value=Connection(
            conn_id="test",
            conn_type="postgres",
            host="localhost",
            login="******",
            password="******",
            port=postgres.ports["5432/tcp"][0],
        ),
    )

    mocker.patch.object(
        S3Hook,
        "get_conn",
        return_value=S3FileSystem(
            key="secretaccess",
            secret="secretkey",
            client_kwargs={
                "endpoint_url": f'http://localhost:{s3.ports["9000/tcp"][0]}'
            },
        ),
    )

    task = DbToFsOperator(
        task_id="test_id",
        src_db_hook=PostgresHook(conn_id="this_conn_will_be_mocked"),
        src_query="SELECT * FROM dummy",
        output_filetype="json",
        dest_path="s3://testbucket/test.json",
        dest_fs_hook=S3Hook(conn_id="this_conn_will_be_mocked_2"),
    )
    task.execute(context={})

    # Assert output
    with open(os.path.join(os.path.dirname(__file__),
                           "testdata.csv")) as local_file:
        csv_reader = csv.reader(local_file)
        header = next(csv_reader)
        rows = list(csv_reader)
        local_data = [{header[i]: col
                       for i, col in enumerate(row)}
                      for row in rows]  # convert to dict

    s3fs = S3Hook().get_conn()
    with s3fs.open("s3://testbucket/test.json", mode="r") as s3_file:
        s3_data = json.load(s3_file)

    # CSV reader makes everything string, so cast all columns to string for comparing values
    s3_data = [{str(k): str(v) for k, v in row.items()} for row in s3_data]
    assert local_data == s3_data
Example #3
0
def test_postgres_to_s3_csv(mocker, postgres, s3, s3_init):
    mocker.patch.object(
        PostgresHook,
        "get_connection",
        return_value=Connection(
            conn_id="test",
            conn_type="postgres",
            host="localhost",
            login="******",
            password="******",
            port=postgres.ports["5432/tcp"][0],
        ),
    )

    mocker.patch.object(
        S3Hook,
        "get_conn",
        return_value=S3FileSystem(
            key="secretaccess",
            secret="secretkey",
            client_kwargs={
                "endpoint_url": f'http://localhost:{s3.ports["9000/tcp"][0]}'
            },
        ),
    )

    task = DbToFsOperator(
        task_id="test_id",
        src_db_hook=PostgresHook(conn_id="this_conn_will_be_mocked"),
        src_query="SELECT * FROM dummy",
        output_filetype="csv",
        dest_path="s3://testbucket/test.csv",
        dest_fs_hook=S3Hook(conn_id="this_conn_will_be_mocked_2"),
    )
    task.execute(context={})

    # Assert output
    with open(os.path.join(os.path.dirname(__file__),
                           "testdata.csv")) as local_file:
        csv_reader = csv.reader(local_file)
        local_data = list(csv_reader)

    s3fs = S3Hook().get_conn()
    with s3fs.open("s3://testbucket/test.csv", mode="r") as s3_file:
        csv_reader = csv.reader(s3_file)
        s3_data = list(csv_reader)

    assert local_data == s3_data
Example #4
0
    def test_walk(self, s3_client, s3_mock_dir, mock_data_dir):
        """Tests the `walk` method."""

        with S3Hook() as hook:
            entries = list(hook.walk(s3_mock_dir))

        pytest.helpers.assert_walk_equal(entries, os.walk(mock_data_dir))
Example #5
0
    def test_exists(self, s3_mock_dir):
        """Tests the `exists` method."""

        with S3Hook() as hook:
            assert hook.exists(posixpath.join(s3_mock_dir, "subdir"))
            assert hook.exists(posixpath.join(s3_mock_dir, "test.txt"))
            assert not hook.exists(
                posixpath.join(s3_mock_dir, "non-existing.txt"))
Example #6
0
    def test_makedirs(self, s3_client, s3_temp_dir):
        """Tests the `mkdir` method with mode parameter."""

        dir_path = posixpath.join(s3_temp_dir, "some", "nested", "dir")

        with S3Hook() as hook:
            hook.makedirs(dir_path, mode=0o750)

        assert s3_client.exists(dir_path)
Example #7
0
    def test_open_read(self, s3_mock_dir):
        """Tests reading of a file using the `open` method."""

        file_path = posixpath.join(s3_mock_dir, "test.txt")

        with S3Hook() as hook:
            with hook.open(file_path) as file_:
                content = file_.read()

        assert content == b"Test file\n"
Example #8
0
    def test_mkdir(self, s3_client, s3_temp_dir):
        """Tests the `mkdir` method with mode parameter."""

        dir_path = posixpath.join(s3_temp_dir, "subdir")
        assert not s3_client.exists(dir_path)

        with S3Hook() as hook:
            hook.mkdir(dir_path, mode=0o750)

        assert s3_client.exists(dir_path)
Example #9
0
    def test_rm(self, s3_client, s3_mock_dir):
        """Tests the `rm` method."""

        file_path = posixpath.join(s3_mock_dir, "test.txt")
        assert s3_client.exists(file_path)

        with S3Hook() as hook:
            hook.rm(file_path)

        assert not s3_client.exists(file_path)
Example #10
0
    def test_open_write(self, s3_client, s3_temp_dir):
        """Tests writing of a file using the `open` method."""

        file_path = posixpath.join(s3_temp_dir, "test2.txt")
        assert not s3_client.exists(file_path)

        with S3Hook() as hook:
            with hook.open(file_path, "wb") as file_:
                file_.write(b"Test file\n")

        assert s3_client.exists(file_path)
Example #11
0
    def test_rmtree(self, s3_client, s3_mock_dir):
        """Tests the `rmtree` method."""

        dir_path = posixpath.join(s3_mock_dir, "subdir")
        assert s3_client.exists(dir_path)

        with S3Hook() as hook:
            hook.rmtree(dir_path)

        s3_client.invalidate_cache(dir_path)
        assert not s3_client.exists(dir_path)
Example #12
0
    def test_makedirs_exists(self, s3_client, s3_temp_dir):
        """Tests the `mkdir` method with exists_ok parameter."""

        dir_path = posixpath.join(s3_temp_dir, "some", "nested", "dir")

        with S3Hook() as hook:
            hook.makedirs(dir_path, exist_ok=False)

            with pytest.raises(IOError):
                hook.makedirs(dir_path, exist_ok=False)

            s3_client.invalidate_cache(dir_path)
            hook.makedirs(dir_path, exist_ok=True)
Example #13
0
    def test_mkdir_exists(self, s3_client, s3_temp_dir):
        """Tests the `mkdir` method with the exists_ok parameter."""

        dir_path = posixpath.join(s3_temp_dir, "subdir")
        assert not s3_client.exists(dir_path)

        with S3Hook() as hook:
            hook.mkdir(dir_path, exist_ok=False)

            with pytest.raises(IOError):
                hook.mkdir(dir_path, exist_ok=False)

            hook.mkdir(dir_path, exist_ok=True)
Example #14
0
def test_mysql_to_s3_csv(mocker, mysql, s3, s3_init):
    task = DbToFsOperator(
        task_id="test_id",
        src_db_hook=MysqlHook(conn_id="this_conn_will_be_mocked"),
        src_query="SELECT * FROM dummy",
        output_filetype="csv",
        dest_path="s3://testbucket/test.csv",
        dest_fs_hook=S3Hook(conn_id="this_conn_will_be_mocked_2"),
    )
    task.execute(context={})

    # Assert output
    with open(os.path.join(os.path.dirname(__file__),
                           "testdata.csv")) as local_file:
        csv_reader = csv.reader(local_file)
        local_data = list(csv_reader)

    s3fs = S3Hook().get_conn()
    with s3fs.open("s3://testbucket/test.csv", mode="r") as s3_file:
        csv_reader = csv.reader(s3_file)
        s3_data = list(csv_reader)

    assert local_data == s3_data
Example #15
0
    def test_glob(self, s3_client, local_mock_dir, s3_temp_dir, test_dag):
        """Tests copying of files using glob pattern."""

        dest_hook = S3Hook()

        task = operators.CopyFileOperator(src_path=posixpath.join(
            local_mock_dir, "*.csv"),
                                          dest_path=s3_temp_dir,
                                          dest_hook=dest_hook,
                                          task_id="copy_task",
                                          dag=test_dag)
        _run_task(task, test_dag)

        assert dest_hook.exists(posixpath.join(s3_temp_dir, "test.csv"))
Example #16
0
    def test_single(self, s3_client, s3_mock_dir, test_dag):
        """Tests deletion of a single directory."""

        hook = S3Hook()

        dir_path = posixpath.join(s3_mock_dir, "subdir")
        assert hook.exists(dir_path)

        task = operators.DeleteTreeOperator(path=dir_path,
                                            hook=hook,
                                            task_id="copy_task",
                                            dag=test_dag)
        _run_task(task, test_dag)

        assert not hook.exists(dir_path)
Example #17
0
    def test_single(self, s3_client, s3_mock_dir, test_dag):
        """Tests deletion of a single file."""

        hook = S3Hook()

        file_path = posixpath.join(s3_mock_dir, "test.txt")
        assert hook.exists(file_path)

        task = operators.DeleteFileOperator(path=file_path,
                                            hook=hook,
                                            task_id="copy_task",
                                            dag=test_dag)
        _run_task(task, test_dag)

        assert not hook.exists(file_path)
Example #18
0
    def test_glob(self, s3_client, s3_mock_dir, test_dag):
        """Tests deletion of multiple files with glob."""

        hook = S3Hook()

        assert hook.exists(posixpath.join(s3_mock_dir, "subdir"))

        task = operators.DeleteTreeOperator(path=posixpath.join(
            s3_mock_dir, "sub*"),
                                            hook=hook,
                                            task_id="copy_task",
                                            dag=test_dag)
        _run_task(task, test_dag)

        assert not hook.exists(posixpath.join(s3_mock_dir, "subdir"))
Example #19
0
    def test_single(self, s3_client, local_mock_dir, s3_temp_dir, test_dag):
        """Tests copying of single file."""

        dest_hook = S3Hook()
        dest_path = posixpath.join(s3_temp_dir, "test.txt")

        assert not dest_hook.exists(dest_path)

        task = operators.CopyFileOperator(src_path=posixpath.join(
            local_mock_dir, "test.txt"),
                                          dest_path=dest_path,
                                          dest_hook=dest_hook,
                                          task_id="copy_task",
                                          dag=test_dag)
        _run_task(task, test_dag)

        assert dest_hook.exists(dest_path)
Example #20
0
    def test_glob(self, s3_client, s3_mock_dir, test_dag):
        """Tests deletion of multiple files with glob."""

        hook = S3Hook()

        assert hook.exists(posixpath.join(s3_mock_dir, "test.txt"))
        assert hook.exists(posixpath.join(s3_mock_dir, "test.csv"))

        task = operators.DeleteFileOperator(path=posixpath.join(
            s3_mock_dir, "test.*"),
                                            hook=hook,
                                            task_id="copy_task",
                                            dag=test_dag)
        _run_task(task, test_dag)

        assert not hook.exists(posixpath.join(s3_mock_dir, "test.txt"))
        assert not hook.exists(posixpath.join(s3_mock_dir, "test.csv"))

        # Check if other file was not deleted.
        assert hook.exists(posixpath.join(s3_mock_dir, "other.txt"))
Example #21
0
    def test_isdir(self, s3_mock_dir):
        """Tests the `isdir` method."""

        with S3Hook() as hook:
            assert hook.isdir(posixpath.join(s3_mock_dir, "subdir"))
            assert not hook.isdir(posixpath.join(s3_mock_dir, "test.txt"))
Example #22
0
    def test_listdir(self, s3_mock_dir, mock_data_dir):
        """Tests the `listdir` method."""

        with S3Hook() as hook:
            assert set(hook.listdir(s3_mock_dir)) == set(
                os.listdir(mock_data_dir))