def test_table_scan_honors_select_without_case_sensitivity(ts_table): scan1 = ts_table.new_scan().case_sensitive(False).select(["ID"]) # order of refinements shouldn't matter scan2 = ts_table.new_scan().select(["ID"]).case_sensitive(False) expected_schema = Schema([NestedField.required(1, "id", IntegerType.get())]) assert scan1.schema.as_struct() == expected_schema.as_struct() assert scan2.schema.as_struct() == expected_schema.as_struct()
def bind(self, struct: StructType, case_sensitive: bool = True) -> BoundReference: from iceberg.api import Schema schema = Schema(struct.fields) field = schema.find_field( self.name ) if case_sensitive else schema.case_insensitive_find_field(self.name) ValidationException.check(field is not None, "Cannot find field '%s' in struct: %s", (self.name, schema.as_struct())) return BoundReference(struct, field)
def test_column_rename(primitive_type_test_file): expected_schema = Schema([ NestedField.required(1, "int_col", IntegerType.get()), NestedField.optional(2, "bigint_col", LongType.get()), NestedField.optional(3, "string_col", StringType.get()), NestedField.optional(4, "float_col", FloatType.get()), NestedField.optional(5, "dbl_col", DoubleType.get()) ]) input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}), primitive_type_test_file, {}) reader = ParquetReader(input_file, expected_schema, {}, Expressions.always_true(), True) pyarrow_array = [ pa.array([1, 2, 3, 4, 5], type=pa.int32()), pa.array([1, 2, 3, None, 5], type=pa.int64()), pa.array(['us', 'can', 'us', 'us', 'can'], type=pa.string()), pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float32()), pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float64()) ] schema = pa.schema([ pa.field("int_col", pa.int32(), False), pa.field("bigint_col", pa.int64(), True), pa.field("string_col", pa.string(), True), pa.field("float_col", pa.float32(), True), pa.field("dbl_col", pa.float64(), True) ]) source_table = pa.table(pyarrow_array, schema=schema) target_table = reader.read() assert source_table == target_table
def expected_metadata_sorting(): spec_schema = Schema(NestedField.required(1, "x", LongType.get()), NestedField.required(2, "y", LongType.get()), NestedField.required(3, "z", LongType.get())) spec = PartitionSpec \ .builder_for(spec_schema) \ .with_spec_id(5) \ .build() random.seed(1234) previous_snapshot_id = int(time.time()) - random.randint(0, 3600) previous_snapshot = BaseSnapshot(ops, previous_snapshot_id, None, timestamp_millis=previous_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.1.avro"), spec_id=spec.spec_id)]) current_snapshot_id = int(time.time()) current_snapshot = BaseSnapshot(ops, current_snapshot_id, previous_snapshot_id, timestamp_millis=current_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.2.avro"), spec_id=spec.spec_id)]) reversed_snapshot_log = list() metadata = TableMetadata(ops, None, "s3://bucket/test/location", int(time.time()), 3, spec_schema, 5, [spec], {"property": "value"}, current_snapshot_id, [previous_snapshot, current_snapshot], reversed_snapshot_log) reversed_snapshot_log.append(SnapshotLogEntry(current_snapshot.timestamp_millis, current_snapshot.snapshot_id)) reversed_snapshot_log.append(SnapshotLogEntry(previous_snapshot.timestamp_millis, previous_snapshot.snapshot_id)) return metadata
def convert_avro_schema_to_iceberg(avro_schema): if avro_schema.get(AvroToIceberg.FIELD_TYPE_PROP) != "record": raise RuntimeError("Cannot convert avro schema to iceberg %s" % avro_schema) struct = AvroToIceberg.convert_type(avro_schema, None) return Schema(struct[0].fields)
def test_schema_evolution_filter(primitive_type_test_file): expected_schema = Schema([ NestedField.required(1, "int_col", IntegerType.get()), NestedField.optional(2, "bigint_col", LongType.get()), NestedField.optional(16, "other_new_col", LongType.get()), NestedField.optional(4, "float_col", FloatType.get()), NestedField.optional(5, "dbl_col", DoubleType.get()), NestedField.optional(3, "string_col", StringType.get()), NestedField.optional(15, "new_col", StringType.get()) ]) input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}), primitive_type_test_file, {}) reader = ParquetReader(input_file, expected_schema, {}, Expressions.not_null("new_col"), True) schema = pa.schema([ pa.field("int_col", pa.int32(), nullable=False), pa.field("bigint_col", pa.int64(), nullable=True), pa.field("other_new_col", pa.int64(), nullable=True), pa.field("float_col", pa.float32(), nullable=True), pa.field("dbl_col", pa.float64(), nullable=True), pa.field("string_col", pa.string(), nullable=True), pa.field("new_col", pa.string(), nullable=True) ]) pyarrow_not_null_array = [ pa.array([], type=pa.int32()), pa.array([], type=pa.int64()), pa.array([], type=pa.int32()), pa.array([], type=pa.float32()), pa.array([], type=pa.float64()), pa.array([], type=pa.string()), pa.array([], type=pa.string()) ] not_null_table = pa.table(pyarrow_not_null_array, schema=schema) pyarrow_null_array = [ pa.array([1, 2, 3, 4, 5], type=pa.int32()), pa.array([1, 2, 3, None, 5], type=pa.int64()), pa.array([None, None, None, None, None], type=pa.int64()), pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float32()), pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float64()), pa.array(['us', 'can', 'us', 'us', 'can'], type=pa.string()), pa.array([None, None, None, None, None], type=pa.string()) ] null_table = pa.table(pyarrow_null_array, schema=schema) target_table = reader.read() assert not_null_table == target_table reader = ParquetReader(input_file, expected_schema, {}, Expressions.is_null("new_col"), True) target_table = reader.read() assert null_table == target_table
def from_json(json_obj): if isinstance(json_obj, str): type_var = SchemaParser.type_from_dict(json.loads(json_obj)) else: type_var = SchemaParser.type_from_dict(json_obj) if type_var is not None and type_var.is_nested_type( ) and type_var.as_nested_type().is_struct_type(): return Schema(type_var.as_nested_type().as_struct_type().fields) else: raise RuntimeError("Cannot create schema, not a struct type: %s" % type_var)
def test_to_json_conversion(): spec_schema = Schema(NestedField.required(1, "id", IntegerType.get()), NestedField.required(2, "data", StringType.get())) spec = PartitionSpec\ .builder_for(spec_schema) \ .identity("id")\ .bucket("data", 16)\ .build() expected = '{"spec-id": 0, "fields": [' \ '{"name": "id", "transform": "identity", "source-id": 1}, ' \ '{"name": "data_bucket", "transform": "bucket[16]", "source-id": 2}]}' assert expected == PartitionSpecParser.to_json(spec)
def test_raise_exception_with_invalid_json(): spec_schema = Schema(NestedField.required(1, "id", IntegerType.get()), NestedField.required(2, "data", StringType.get()), NestedField.required(3, "num", DecimalType.of(9, 2))) spec_string = '{"spec-id": 0, "fields": [' \ '{"name": "id", "transform": "identity", "source-id": 1, "field-id": 1000}, ' \ '{"name": "data_bucket", "transform": "bucket[16]", "source-id": 2, "field-id": 1001}, ' \ '{"name": "data1", "transform": "bucket[16]", "source-id": 2}, ' \ '{"name": "data2", "transform": "bucket[8]", "source-id": 2}, ' \ '{"name": "num_bucket", "transform": "bucket[8]", "source-id": 3}]}' with pytest.raises(RuntimeError): PartitionSpecParser.from_json(spec_schema, spec_string)
def test_column_upcast(primitive_type_test_file): expected_schema = Schema( [NestedField.required(1, "int_col", LongType.get())]) input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}), primitive_type_test_file, {}) reader = ParquetReader(input_file, expected_schema, {}, Expressions.always_true(), True) pyarrow_array = [pa.array([1, 2, 3, 4, 5], type=pa.int32())] source_table = pa.table( pyarrow_array, schema=pa.schema([pa.field("int_col", pa.int64(), nullable=False)])) target_table = reader.read() assert source_table == target_table
def arrow_to_iceberg(arrow_schema: pa.Schema) -> Schema: """ Use an arrow schema, which contains the field_id metadata, to create an equivalent iceberg Schema Parameters ---------- arrow_schema : pyarrow.Schema An Arrow schema with the parquet field_id metadata Returns ------- iceberg.api.Schema returns an equivalent iceberg Schema based on the arrow schema read from the file """ return Schema([get_field(col) for col in arrow_schema])
def test_unnested_complex_types(unnested_complex_type_test_parquet_file): expected_schema = Schema([ NestedField.optional(1, "list_int_col", ListType.of_optional(3, IntegerType.get())), NestedField.optional(4, "list_str_col", ListType.of_optional(6, StringType.get())), NestedField.optional( 7, "struct_col", StructType.of([ NestedField.optional(8, "f1", IntegerType.get()), NestedField.optional(9, "f2", StringType.get()) ])) ]) converted_schema = convert_parquet_to_iceberg( unnested_complex_type_test_parquet_file) compare_schema(expected_schema, converted_schema)
def rg_expected_schema(): return Schema([ NestedField.required(1, "string_col", StringType.get()), NestedField.required(2, "long_col", LongType.get()), NestedField.required(3, "int_col", IntegerType.get()), NestedField.optional(4, "float_col", FloatType.get()), NestedField.optional(5, "null_col", StringType.get()), NestedField.optional(6, "missing_col", StringType.get()), NestedField.optional(7, "no_stats_col", StringType.get()), NestedField.optional(8, "ts_wtz_col", TimestampType.with_timezone()), NestedField.optional(9, "ts_wotz_col", TimestampType.without_timezone()), NestedField.optional(10, "big_decimal_type", DecimalType.of(38, 5)), NestedField.optional(11, "small_decimal_type", DecimalType.of(10, 2)), NestedField.optional(12, "date_type", DateType.get()), ])
def test_primitive_types(primitive_type_test_parquet_file): expected_schema = Schema([ NestedField.required(1, "int_col", IntegerType.get()), NestedField.optional(2, "bigint_col", LongType.get()), NestedField.optional(3, "str_col", StringType.get()), NestedField.optional(4, "float_col", FloatType.get()), NestedField.optional(5, "dbl_col", DoubleType.get()), NestedField.optional(6, "decimal_col", DecimalType.of(9, 2)), NestedField.optional(7, "big_decimal_col", DecimalType.of(19, 5)), NestedField.optional(8, "huge_decimal_col", DecimalType.of(38, 9)), NestedField.optional(9, "date_col", DateType.get()), NestedField.optional(10, "ts_col", TimestampType.without_timezone()), NestedField.optional(11, "ts_wtz_col", TimestampType.with_timezone()), NestedField.optional(12, "bool_col", BooleanType.get()) ]) compare_schema( expected_schema, convert_parquet_to_iceberg(primitive_type_test_parquet_file))
def prune_columns(file_schema: Schema, expected_schema: Schema) -> List[str]: """ Given two Iceberg schema's returns a list of column_names for all id's in the file schema that are projected in the expected schema Parameters ---------- file_schema : iceberg.api.Schema An Iceberg schema of the file being read expected_schema : iceberg.api.Schema An Iceberg schema of the final projection Returns ------- list The column names in the file that matched ids in the expected schema """ return [column.name for column in file_schema.as_struct().fields if column.id in get_projected_ids(expected_schema)]
def test_projection(primitive_type_test_file, pyarrow_primitive_array, pyarrow_schema): expected_schema = Schema([ NestedField.required(1, "int_col", IntegerType.get()), NestedField.optional(2, "bigint_col", LongType.get()) ]) input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}), primitive_type_test_file, {}) reader = ParquetReader(input_file, expected_schema, {}, Expressions.always_true(), True) source_table = pa.table(pyarrow_primitive_array, schema=pyarrow_schema) num_cols = source_table.num_columns for i in range(1, num_cols - 1): source_table = source_table.remove_column(num_cols - i) assert source_table == reader.read()
def test_compound_filter(primitive_type_test_file): expected_schema = Schema([ NestedField.required(1, "int_col", IntegerType.get()), NestedField.optional(2, "bigint_col", LongType.get()), NestedField.optional(4, "float_col", FloatType.get()), NestedField.optional(5, "dbl_col", DoubleType.get()), NestedField.optional(3, "string_col", StringType.get()) ]) input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}), primitive_type_test_file, {}) reader = ParquetReader( input_file, expected_schema, {}, Expressions.and_(Expressions.equal("string_col", "us"), Expressions.equal("int_col", 1)), True) pyarrow_array = [ pa.array([1], type=pa.int32()), pa.array([1], type=pa.int64()), pa.array([1.0], type=pa.float32()), pa.array([1.0], type=pa.float64()), pa.array(['us'], type=pa.string()) ] source_table = pa.table(pyarrow_array, schema=pa.schema([ pa.field("int_col", pa.int32(), nullable=False), pa.field("bigint_col", pa.int64(), nullable=True), pa.field("float_col", pa.float32(), nullable=True), pa.field("dbl_col", pa.float64(), nullable=True), pa.field("string_col", pa.string(), nullable=True) ])) target_table = reader.read() assert source_table == target_table
def test_decimal_column_add(primitive_type_test_file): expected_schema = Schema([ NestedField.required(1, "int_col", IntegerType.get()), NestedField.optional(2, "bigint_col", LongType.get()), NestedField.optional(4, "float_col", FloatType.get()), NestedField.optional(5, "dbl_col", DoubleType.get()), NestedField.optional(13, "new_dec_col", DecimalType.of(38, 9)) ]) input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}), primitive_type_test_file, {}) reader = ParquetReader(input_file, expected_schema, {}, Expressions.always_true(), True) pyarrow_array = [ pa.array([1, 2, 3, 4, 5], type=pa.int32()), pa.array([1, 2, 3, None, 5], type=pa.int64()), pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float32()), pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float64()), pa.array([None, None, None, None, None], type=pa.decimal128(38, 9)) ] source_table = pa.table(pyarrow_array, schema=pa.schema([ pa.field("int_col", pa.int32(), nullable=False), pa.field("bigint_col", pa.int64(), nullable=True), pa.field("float_col", pa.float32(), nullable=True), pa.field("dbl_col", pa.float64(), nullable=True), pa.field("new_dec_col", pa.decimal128(38, 9), nullable=True) ])) target_table = reader.read() assert source_table == target_table
def test_to_json_conversion(): spec_schema = Schema(NestedField.required(1, "id", IntegerType.get()), NestedField.required(2, "data", StringType.get()), NestedField.required(3, "num", DecimalType.of(9, 2))) spec = PartitionSpec \ .builder_for(spec_schema) \ .identity("id") \ .bucket("data", 16) \ .add_without_field_id(2, "data1", "bucket[16]") \ .add(2, 1010, "data2", "bucket[8]") \ .bucket("num", 8) \ .build() expected = '{"spec-id": 0, "fields": [' \ '{"name": "id", "transform": "identity", "source-id": 1, "field-id": 1000}, ' \ '{"name": "data_bucket", "transform": "bucket[16]", "source-id": 2, "field-id": 1001}, ' \ '{"name": "data1", "transform": "bucket[16]", "source-id": 2, "field-id": 1002}, ' \ '{"name": "data2", "transform": "bucket[8]", "source-id": 2, "field-id": 1010}, ' \ '{"name": "num_bucket", "transform": "bucket[8]", "source-id": 3, "field-id": 1011}]}' assert expected == PartitionSpecParser.to_json(spec)
def missing_spec_list(): schema = Schema(NestedField.required(1, "x", LongType.get()), NestedField.required(2, "y", LongType.get()), NestedField.required(3, "z", LongType.get())) spec = PartitionSpec.builder_for(schema).identity("x").with_spec_id(6).build() random.seed(1234) previous_snapshot_id = int(time.time()) - random.randint(0, 3600) previous_snapshot = BaseSnapshot(ops, previous_snapshot_id, None, timestamp_millis=previous_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.1.avro"), spec_id=spec.spec_id)]) current_snapshot_id = int(time.time()) current_snapshot = BaseSnapshot(ops, current_snapshot_id, previous_snapshot_id, timestamp_millis=current_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.2.avro"), spec_id=spec.spec_id)]) return TableMetadata(ops, None, "s3://bucket/test/location", int(time.time()), 3, schema, 6, (spec,), {"property": "value"}, current_snapshot_id, [previous_snapshot, current_snapshot], [])
def new_table_metadata(ops: TableOperations, schema: Schema, spec: PartitionSpec, location: str, properties: dict = None) -> "TableMetadata": last_column_id = AtomicInteger(0) fresh_schema = assign_fresh_ids(schema, last_column_id.increment_and_get) spec_builder = PartitionSpec.builder_for(fresh_schema) for field in spec.fields: src_name = schema.find_column_name(field.source_id) spec_builder.add(field.source_id, fresh_schema.find_field(src_name).field_id, field.name, str(field.transform)) fresh_spec = spec_builder.build() properties = properties if properties is not None else dict() return TableMetadata(ops, None, location, int(time.time() * 1000), last_column_id.get(), fresh_schema, TableMetadata.INITIAL_SPEC_ID, [fresh_spec], properties, -1, list(), list())
def test_basic_read(primitive_type_test_file, pyarrow_primitive_array, pyarrow_schema): expected_schema = Schema([ NestedField.required(1, "int_col", IntegerType.get()), NestedField.optional(2, "bigint_col", LongType.get()), NestedField.optional(3, "str_col", StringType.get()), NestedField.optional(4, "float_col", FloatType.get()), NestedField.optional(5, "dbl_col", DoubleType.get()), NestedField.optional(6, "decimal_col", DecimalType.of(9, 2)), NestedField.optional(7, "big_decimal_col", DecimalType.of(19, 5)), NestedField.optional(8, "huge_decimal_col", DecimalType.of(38, 9)), NestedField.optional(9, "date_col", DateType.get()), NestedField.optional(10, "ts_col", TimestampType.without_timezone()), NestedField.optional(11, "ts_wtz_col", TimestampType.with_timezone()), NestedField.optional(12, "bool_col", BooleanType.get()) ]) input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}), primitive_type_test_file, {}) reader = ParquetReader(input_file, expected_schema, {}, Expressions.always_true(), True) source_table = pa.table(pyarrow_primitive_array, schema=pyarrow_schema) assert reader.read() == source_table
def test_to_json_conversion(): spec_schema = Schema(NestedField.required(1, "i", IntegerType.get()), NestedField.required(2, "l", LongType.get()), NestedField.required(3, "d", DateType.get()), NestedField.required(4, "t", TimeType.get()), NestedField.required(5, "ts", TimestampType.without_timezone()), NestedField.required(6, "dec", DecimalType.of(9, 2)), NestedField.required(7, "s", StringType.get()), NestedField.required(8, "u", UUIDType.get()), NestedField.required(9, "f", FixedType.of_length(3)), NestedField.required(10, "b", BinaryType.get())) specs = [ PartitionSpec.builder_for(spec_schema).identity("i").build(), PartitionSpec.builder_for(spec_schema).identity("l").build(), PartitionSpec.builder_for(spec_schema).identity("d").build(), PartitionSpec.builder_for(spec_schema).identity("t").build(), PartitionSpec.builder_for(spec_schema).identity("ts").build(), PartitionSpec.builder_for(spec_schema).identity("dec").build(), PartitionSpec.builder_for(spec_schema).identity("s").build(), PartitionSpec.builder_for(spec_schema).identity("u").build(), PartitionSpec.builder_for(spec_schema).identity("f").build(), PartitionSpec.builder_for(spec_schema).identity("b").build(), PartitionSpec.builder_for(spec_schema).bucket("i", 128).build(), PartitionSpec.builder_for(spec_schema).bucket("l", 128).build(), PartitionSpec.builder_for(spec_schema).bucket("d", 128).build(), PartitionSpec.builder_for(spec_schema).bucket("t", 128).build(), PartitionSpec.builder_for(spec_schema).bucket("ts", 128).build(), PartitionSpec.builder_for(spec_schema).bucket("dec", 128).build(), PartitionSpec.builder_for(spec_schema).bucket("s", 128).build(), PartitionSpec.builder_for(spec_schema).year("d").build(), PartitionSpec.builder_for(spec_schema).month("d").build(), PartitionSpec.builder_for(spec_schema).day("d").build(), PartitionSpec.builder_for(spec_schema).year("ts").build(), PartitionSpec.builder_for(spec_schema).month("ts").build(), PartitionSpec.builder_for(spec_schema).day("ts").build(), PartitionSpec.builder_for(spec_schema).hour("ts").build(), PartitionSpec.builder_for(spec_schema).truncate("i", 10).build(), PartitionSpec.builder_for(spec_schema).truncate("l", 10).build(), PartitionSpec.builder_for(spec_schema).truncate("dec", 10).build(), PartitionSpec.builder_for(spec_schema).truncate("s", 10).build(), PartitionSpec.builder_for(spec_schema).add(6, "dec_bucket", "bucket[16]").build() ] expected_spec_strs = [ "[\n i: identity(1)\n]", "[\n l: identity(2)\n]", "[\n d: identity(3)\n]", "[\n t: identity(4)\n]", "[\n ts: identity(5)\n]", "[\n dec: identity(6)\n]", "[\n s: identity(7)\n]", "[\n u: identity(8)\n]", "[\n f: identity(9)\n]", "[\n b: identity(10)\n]", "[\n i_bucket: bucket[128](1)\n]", "[\n l_bucket: bucket[128](2)\n]", "[\n d_bucket: bucket[128](3)\n]", "[\n t_bucket: bucket[128](4)\n]", "[\n ts_bucket: bucket[128](5)\n]", "[\n dec_bucket: bucket[128](6)\n]", "[\n s_bucket: bucket[128](7)\n]", "[\n d_year: year(3)\n]", "[\n d_month: month(3)\n]", "[\n d_day: day(3)\n]", "[\n ts_year: year(5)\n]", "[\n ts_month: month(5)\n]", "[\n ts_day: day(5)\n]", "[\n ts_hour: hour(5)\n]", "[\n i_truncate: truncate[10](1)\n]", "[\n l_truncate: truncate[10](2)\n]", "[\n dec_truncate: truncate[10](6)\n]", "[\n s_truncate: truncate[10](7)\n]", "[\n dec_bucket: bucket[16](6)\n]", ] for (spec, expected_spec_str) in zip(specs, expected_spec_strs): assert str(spec) == expected_spec_str
def wrap_file_schema(file_struct): return Schema(NestedField.required(0, "status", IntegerType.get()), NestedField.required(1, "snapshot_id", LongType.get()), NestedField.required(2, "data_file", file_struct))
def project_schema(part_type, columns): return ManifestEntry.wrap_file_schema(Schema(DataFile.get_type(part_type).fields) .select(columns) .as_struct())
def base_scan_schema(): return Schema([ NestedField.required(1, "id", IntegerType.get()), NestedField.required(2, "data", StringType.get()) ])
def test_table_scan_honors_select(ts_table): scan = ts_table.new_scan().select(["id"]) expected_schema = Schema([NestedField.required(1, "id", IntegerType.get())]) assert scan.schema.as_struct() == expected_schema.as_struct()
import os import random import tempfile import time from iceberg.api import Files, PartitionSpec, Schema from iceberg.api.types import BooleanType, IntegerType, LongType, NestedField, StringType from iceberg.core import (BaseSnapshot, BaseTable, ConfigProperties, GenericManifestFile, SnapshotLogEntry, TableMetadata, TableMetadataParser, TableOperations, TableProperties) from iceberg.exceptions import AlreadyExistsException, CommitFailedException import pytest SCHEMA = Schema([NestedField.optional(1, "b", BooleanType.get())]) METADATA = dict() VERSIONS = dict() class LocalTableOperations(TableOperations): def current(self): raise RuntimeError("Not implemented for tests") def refresh(self): raise RuntimeError("Not implemented for tests") def commit(self, base, metadata): raise RuntimeError("Not implemented for tests") def new_input_file(self, path):
def iceberg_full_read_projection_schema(): return Schema([ NestedField.required(0, "id", LongType.get()), NestedField.optional(1, "data", StringType.get()) ])