コード例 #1
0
    def test_read(
        self,
        format: str,
        stream: bool,
        schema: Optional[StructType],
        path: Any,
        options: Any,
        target_df: DataFrame,
        mocked_spark_read: Mock,
    ) -> None:
        # arrange
        spark_client = SparkClient()
        mocked_spark_read.load.return_value = target_df
        spark_client._session = mocked_spark_read

        # act
        result_df = spark_client.read(format=format,
                                      schema=schema,
                                      stream=stream,
                                      path=path,
                                      **options)

        # assert
        mocked_spark_read.format.assert_called_once_with(format)
        mocked_spark_read.load.assert_called_once_with(path=path, **options)
        assert target_df.collect() == result_df.collect()
コード例 #2
0
    def test_add_table_partitions(self, mock_spark_sql: Mock):
        # arrange
        target_command = (f"ALTER TABLE `db`.`table` ADD IF NOT EXISTS "
                          f"PARTITION ( year = 2020, month = 8, day = 14 ) "
                          f"PARTITION ( year = 2020, month = 8, day = 15 ) "
                          f"PARTITION ( year = 2020, month = 8, day = 16 )")

        spark_client = SparkClient()
        spark_client._session = mock_spark_sql
        partitions = [
            {
                "year": 2020,
                "month": 8,
                "day": 14
            },
            {
                "year": 2020,
                "month": 8,
                "day": 15
            },
            {
                "year": 2020,
                "month": 8,
                "day": 16
            },
        ]

        # act
        spark_client.add_table_partitions(partitions, "table", "db")

        # assert
        mock_spark_sql.assert_called_once_with(target_command)
コード例 #3
0
    def test_add_invalid_partitions(self, mock_spark_sql: Mock, partition):
        # arrange
        spark_client = SparkClient()
        spark_client._session = mock_spark_sql

        # act and assert
        with pytest.raises(ValueError):
            spark_client.add_table_partitions(partition, "table", "db")
コード例 #4
0
    def test_read_table(
        self,
        target_df: DataFrame,
        mocked_spark_read: Mock,
        database: Optional[str],
        table: str,
        target_table_name: str,
    ) -> None:
        # arrange
        spark_client = SparkClient()
        mocked_spark_read.table.return_value = target_df
        spark_client._session = mocked_spark_read

        # act
        result_df = spark_client.read_table(table, database)

        # assert
        mocked_spark_read.table.assert_called_once_with(target_table_name)
        assert target_df == result_df