def expected_metadata_sorting(): spec_schema = Schema(NestedField.required(1, "x", LongType.get()), NestedField.required(2, "y", LongType.get()), NestedField.required(3, "z", LongType.get())) spec = PartitionSpec \ .builder_for(spec_schema) \ .with_spec_id(5) \ .build() random.seed(1234) previous_snapshot_id = int(time.time()) - random.randint(0, 3600) previous_snapshot = BaseSnapshot(ops, previous_snapshot_id, None, timestamp_millis=previous_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.1.avro"), spec_id=spec.spec_id)]) current_snapshot_id = int(time.time()) current_snapshot = BaseSnapshot(ops, current_snapshot_id, previous_snapshot_id, timestamp_millis=current_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.2.avro"), spec_id=spec.spec_id)]) reversed_snapshot_log = list() metadata = TableMetadata(ops, None, "s3://bucket/test/location", int(time.time()), 3, spec_schema, 5, [spec], {"property": "value"}, current_snapshot_id, [previous_snapshot, current_snapshot], reversed_snapshot_log) reversed_snapshot_log.append(SnapshotLogEntry(current_snapshot.timestamp_millis, current_snapshot.snapshot_id)) reversed_snapshot_log.append(SnapshotLogEntry(previous_snapshot.timestamp_millis, previous_snapshot.snapshot_id)) return metadata
def missing_spec_list(): schema = Schema(NestedField.required(1, "x", LongType.get()), NestedField.required(2, "y", LongType.get()), NestedField.required(3, "z", LongType.get())) spec = PartitionSpec.builder_for(schema).identity("x").with_spec_id(6).build() random.seed(1234) previous_snapshot_id = int(time.time()) - random.randint(0, 3600) previous_snapshot = BaseSnapshot(ops, previous_snapshot_id, None, timestamp_millis=previous_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.1.avro"), spec_id=spec.spec_id)]) current_snapshot_id = int(time.time()) current_snapshot = BaseSnapshot(ops, current_snapshot_id, previous_snapshot_id, timestamp_millis=current_snapshot_id, manifests=[GenericManifestFile(file=Files.local_input("file:/tmp/manfiest.2.avro"), spec_id=spec.spec_id)]) return TableMetadata(ops, None, "s3://bucket/test/location", int(time.time()), 3, schema, 6, (spec,), {"property": "value"}, current_snapshot_id, [previous_snapshot, current_snapshot], [])
def snapshot_manifests(): return (GenericManifestFile( file=Files.local_input("file:/tmp/manifest1.avro"), spec_id=0), GenericManifestFile( file=Files.local_input("file:/tmp/manifest2.avro"), spec_id=0))
def add_all(self, values): manifest_records = [ GenericManifestFile.to_avro_record_dict(value) for value in values if not isinstance(value, str) ] writer(self.file, self.schema, manifest_records)