def schema(): return Schema(NestedField.required(1, "id", IntegerType.get()), NestedField.optional(2, "no_stats", IntegerType.get()), NestedField.required(3, "required", StringType.get()), NestedField.optional(4, "all_nulls", StringType.get()), NestedField.optional(5, "some_nulls", StringType.get()), NestedField.optional(6, "no_nulls", StringType.get()))
def inc_man_spec(): inc_schema = Schema( NestedField.required(1, "id", IntegerType.get()), NestedField.optional(4, "all_nulls", StringType.get()), NestedField.optional(5, "some_nulls", StringType.get()), NestedField.optional(6, "no_nulls", StringType.get())) return (PartitionSpec.builder_for(inc_schema).with_spec_id(0).identity( "id").identity("all_nulls").identity("some_nulls").identity( "no_nulls").build())
def test_partition_spec(self): schema = Schema(NestedField.required(1, "i", IntegerType.get()), NestedField.required(2, "l", LongType.get()), NestedField.required(3, "d", DateType.get()), NestedField.required(4, "t", TimeType.get()), NestedField.required(5, "ts", TimestampType.without_timezone()), NestedField.required(6, "dec", DecimalType.of(9, 2)), NestedField.required(7, "s", StringType.get()), NestedField.required(8, "u", UUIDType.get()), NestedField.required(9, "f", FixedType.of_length(3)), NestedField.required(10, "b", BinaryType.get())) specs = [PartitionSpec.builder_for(schema).identity("i").build(), PartitionSpec.builder_for(schema).identity("l").build(), PartitionSpec.builder_for(schema).identity("d").build(), PartitionSpec.builder_for(schema).identity("t").build(), PartitionSpec.builder_for(schema).identity("ts").build(), PartitionSpec.builder_for(schema).identity("dec").build(), PartitionSpec.builder_for(schema).identity("s").build(), PartitionSpec.builder_for(schema).identity("u").build(), PartitionSpec.builder_for(schema).identity("f").build(), PartitionSpec.builder_for(schema).identity("b").build(), PartitionSpec.builder_for(schema).bucket("i", 128).build(), PartitionSpec.builder_for(schema).bucket("l", 128).build(), PartitionSpec.builder_for(schema).bucket("d", 128).build(), PartitionSpec.builder_for(schema).bucket("t", 128).build(), PartitionSpec.builder_for(schema).bucket("ts", 128).build(), PartitionSpec.builder_for(schema).bucket("dec", 128).build(), PartitionSpec.builder_for(schema).bucket("s", 128).build(), PartitionSpec.builder_for(schema).bucket("u", 128).build(), PartitionSpec.builder_for(schema).bucket("f", 128).build(), PartitionSpec.builder_for(schema).bucket("b", 128).build(), PartitionSpec.builder_for(schema).year("d").build(), PartitionSpec.builder_for(schema).month("d").build(), PartitionSpec.builder_for(schema).day("d").build(), PartitionSpec.builder_for(schema).year("ts").build(), PartitionSpec.builder_for(schema).month("ts").build(), PartitionSpec.builder_for(schema).day("ts").build(), PartitionSpec.builder_for(schema).hour("ts").build(), PartitionSpec.builder_for(schema).truncate("i", 10).build(), PartitionSpec.builder_for(schema).truncate("l", 10).build(), PartitionSpec.builder_for(schema).truncate("dec", 10).build(), PartitionSpec.builder_for(schema).truncate("s", 10).build(), PartitionSpec.builder_for(schema).add_without_field_id(6, "dec_unsupported", "unsupported").build(), PartitionSpec.builder_for(schema).add(6, 1111, "dec_unsupported", "unsupported").build(), ] for spec in specs: self.assertEqual(spec, TestHelpers.round_trip_serialize(spec))
import io import pickle import uuid from iceberg.api import DataFile from iceberg.api.expressions import (BoundPredicate, Expressions, ExpressionVisitors, Literal, Operation, UnboundPredicate) from iceberg.api.schema import Schema from iceberg.api.struct_like import StructLike from iceberg.api.types import (BinaryType, Conversions, DateType, DecimalType, FixedType, IntegerType, NestedField, StringType, TimestampType, TimeType) import pytest exp_schema = Schema(NestedField.optional(34, "a", IntegerType.get())) class TestHelpers(object): @staticmethod def assert_all_references_bound(message, expr): ExpressionVisitors.visit(expr, TestHelpers.CheckReferencesBound(message)) @staticmethod def assert_and_unwrap(expr, expected=None): if expected is not None: assert isinstance(expr, expected) else: assert isinstance(expr, BoundPredicate)