def test_gets_max_denies_text(self): key = "safekeyprefixname/safedatasetname" bucket = "safebucketname" part_types = {"string_col": "string", "bool_col": "bool"} bucket, df, partitions, published_files = self.mock_publish( bucket=bucket, key=key, partition_types=part_types) with pytest.raises(ValueError): fetched_max = fetch_parq.get_max_partition_value( bucket=bucket, key=key, partition="string_col") with pytest.raises(ValueError): fetched_max = fetch_parq.get_max_partition_value( bucket=bucket, key=key, partition="bool_col")
def test_gets_max_denies_text(): key = "safekeyprefixname/safedatasetname" bucket = "safebucketname" part_types = {"string_col": "string", "bool_col": "bool"} col_types = dict(part_types) col_types["metrics"] = "int" df = setup_grouped_dataframe(count=10, columns=col_types) bucket, parquet_paths = setup_partitioned_parquet( dataframe=df, bucket=bucket, key=key, partition_data_types=part_types) with pytest.raises(ValueError): fetched_max = fetch_parq.get_max_partition_value( bucket=bucket, key=key, partition="string_col") with pytest.raises(ValueError): fetched_max = fetch_parq.get_max_partition_value(bucket=bucket, key=key, partition="bool_col")
def test_gets_max(self): key = "safekeyprefixname/safedatasetname" bucket = "safebucketname" part_types = {"int_col": "int", "float_col": "float"} bucket, df, partitions, published_files = self.mock_publish( bucket=bucket, key=key, partition_types=part_types) fetched_max = fetch_parq.get_max_partition_value(bucket=bucket, key=key, partition="int_col") # Test max of column is max of the fetched partition assert df["int_col"].max() == fetched_max
def test_gets_max(): key = "safekeyprefixname/safedatasetname" bucket = "safebucketname" part_types = {"int_col": "int", "float_col": "float"} df = setup_grouped_dataframe(count=10, columns=part_types) bucket, parquet_paths = setup_partitioned_parquet( dataframe=df, bucket=bucket, key=key, partition_data_types={"int_col": "int"}) fetched_max = fetch_parq.get_max_partition_value(bucket=bucket, key=key, partition="int_col") # Test max of column is max of the fetched partition assert df["int_col"].max() == fetched_max
def get_max_partition_value(self, bucket: str, key: str, partition: str) -> any: return get_max_partition_value( bucket=bucket, key=key, partition=partition )