def test_get_batch_with_split_on_whole_table(test_df): split_df = PandasExecutionEngine().get_batch_data( RuntimeDataBatchSpec(batch_data=test_df, splitter_method="_split_on_whole_table")) assert split_df.dataframe.shape == (120, 10)
def test_sample_using_random(test_df): random.seed(1) sampled_df = PandasExecutionEngine().get_batch_data( RuntimeDataBatchSpec(batch_data=test_df, sampling_method="_sample_using_random")) assert sampled_df.dataframe.shape == (13, 10)
def test_get_batch_data(test_sparkdf, basic_spark_df_execution_engine): test_sparkdf = basic_spark_df_execution_engine.get_batch_data( RuntimeDataBatchSpec(batch_data=test_sparkdf, data_asset_name="DATA_ASSET") ).dataframe assert test_sparkdf.count() == 120 assert len(test_sparkdf.columns) == 10