def test_read( self, format: str, stream: bool, schema: Optional[StructType], path: Any, options: Any, target_df: DataFrame, mocked_spark_read: Mock, ) -> None: # arrange spark_client = SparkClient() mocked_spark_read.load.return_value = target_df spark_client._session = mocked_spark_read # act result_df = spark_client.read(format=format, schema=schema, stream=stream, path=path, **options) # assert mocked_spark_read.format.assert_called_once_with(format) mocked_spark_read.load.assert_called_once_with(path=path, **options) assert target_df.collect() == result_df.collect()
def test_add_table_partitions(self, mock_spark_sql: Mock): # arrange target_command = (f"ALTER TABLE `db`.`table` ADD IF NOT EXISTS " f"PARTITION ( year = 2020, month = 8, day = 14 ) " f"PARTITION ( year = 2020, month = 8, day = 15 ) " f"PARTITION ( year = 2020, month = 8, day = 16 )") spark_client = SparkClient() spark_client._session = mock_spark_sql partitions = [ { "year": 2020, "month": 8, "day": 14 }, { "year": 2020, "month": 8, "day": 15 }, { "year": 2020, "month": 8, "day": 16 }, ] # act spark_client.add_table_partitions(partitions, "table", "db") # assert mock_spark_sql.assert_called_once_with(target_command)
def test_add_invalid_partitions(self, mock_spark_sql: Mock, partition): # arrange spark_client = SparkClient() spark_client._session = mock_spark_sql # act and assert with pytest.raises(ValueError): spark_client.add_table_partitions(partition, "table", "db")
def test_read_table( self, target_df: DataFrame, mocked_spark_read: Mock, database: Optional[str], table: str, target_table_name: str, ) -> None: # arrange spark_client = SparkClient() mocked_spark_read.table.return_value = target_df spark_client._session = mocked_spark_read # act result_df = spark_client.read_table(table, database) # assert mocked_spark_read.table.assert_called_once_with(target_table_name) assert target_df == result_df