def _is_schema_update_supported(self, schema_delta): if not schema_delta.old_value or not schema_delta.new_value: return False old_schema = Schema.from_column_dicts(schema_delta.old_value) new_schema = Schema.from_column_dicts(schema_delta.new_value) dt = old_schema.diff_type_counts(new_schema) # We do support name_updates, but we don't support any other type of schema update # - except by rewriting the entire table. dt.pop("name_updates") return sum(dt.values()) == 0
def abcdef_schema(): return Schema.from_column_dicts([ { "id": "a", "name": "a", "dataType": "integer", "primaryKeyIndex": 0, "size": 64, }, { "id": "b", "name": "b", "dataType": "geometry", }, { "id": "c", "name": "c", "dataType": "boolean", }, { "id": "d", "name": "d", "dataType": "float", }, { "id": "e", "name": "e", "dataType": "text", }, { "id": "f", "name": "f", "dataType": "text", }, ])
def test_pk_encoder_int_pk(): schema = Schema.from_column_dicts([{ "name": "mypk", "dataType": "integer", "size": 64, "id": "abc123", "primaryKeyIndex": 0, }]) ds = TableV3.new_dataset_for_writing("mytable", schema, MemoryRepo()) e = ds.feature_path_encoder assert isinstance(e, IntPathEncoder) assert e.encoding == "base64" assert e.branches == 64 assert e.levels == 4 with pytest.raises(TypeError): ds.encode_1pk_to_path("Dave") with pytest.raises(TypeError): ds.encode_1pk_to_path(0.1) assert ds.encode_1pk_to_path( 0) == "mytable/.table-dataset/feature/A/A/A/A/kQA=" assert ds.encode_1pk_to_path( 1) == "mytable/.table-dataset/feature/A/A/A/A/kQE=" assert ds.encode_1pk_to_path( -1) == "mytable/.table-dataset/feature/_/_/_/_/kf8=" assert (ds.encode_1pk_to_path(1181) == "mytable/.table-dataset/feature/A/A/A/S/kc0EnQ==") # trees hit wraparound with large PKs, but don't break assert (ds.encode_1pk_to_path( 64**5) == "mytable/.table-dataset/feature/A/A/A/A/kc5AAAAA") assert (ds.encode_1pk_to_path(-(64**5)) == "mytable/.table-dataset/feature/A/A/A/A/kdLAAAAA")
def _get_old_and_new_schema(self, ds_path, ds_diff): from kart.tabular.schema import Schema old_schema = new_schema = None schema_delta = ds_diff.recursive_get(["meta", "schema.json"]) if schema_delta and schema_delta.old_value: old_schema = Schema.from_column_dicts(schema_delta.old_value) if schema_delta and schema_delta.new_value: new_schema = Schema.from_column_dicts(schema_delta.new_value) if old_schema or new_schema: return old_schema, new_schema # No diff - old and new schemas are the same. ds = self.base_rs.datasets().get( ds_path) or self.target_rs.datasets().get(ds_path) schema = ds.schema return schema, schema
def _apply_meta_schema_json(self, sess, dataset, src_value, dest_value): src_schema = Schema.from_column_dicts(src_value) dest_schema = Schema.from_column_dicts(dest_value) diff_types = src_schema.diff_types(dest_schema) name_updates = diff_types.pop("name_updates") if any(dt for dt in diff_types.values()): raise RuntimeError( f"This schema change not supported by update - should be drop + rewrite_full: {diff_types}" ) for col_id in name_updates: src_name = src_schema[col_id].name dest_name = dest_schema[col_id].name sess.execute(f""" ALTER TABLE {self.table_identifier(dataset)} RENAME COLUMN {self.quote(src_name)} TO {self.quote(dest_name)} """)
def _apply_meta_schema_json(self, sess, dataset, src_value, dest_value): src_schema = Schema.from_column_dicts(src_value) dest_schema = Schema.from_column_dicts(dest_value) diff_types = src_schema.diff_types(dest_schema) deletes = diff_types.pop("deletes") name_updates = diff_types.pop("name_updates") type_updates = diff_types.pop("type_updates") if any(dt for dt in diff_types.values()): raise RuntimeError( f"This schema change not supported by update - should be drop + re-write_full: {diff_types}" ) table = dataset.table_name for col_id in deletes: src_name = src_schema[col_id].name sess.execute( f""" ALTER TABLE {self.table_identifier(table)} DROP COLUMN {self.quote(src_name)}; """ ) for col_id in name_updates: src_name = src_schema[col_id].name dest_name = dest_schema[col_id].name sess.execute( """sp_rename :qualifified_src_name, :dest_name, 'COLUMN';""", { "qualifified_src_name": f"{self.db_schema}.{table}.{src_name}", "dest_name": dest_name, }, ) for col_id in type_updates: col = dest_schema[col_id] dest_spec = KartAdapter_SqlServer.v2_column_schema_to_sql_spec(col, dataset) sess.execute( f"""ALTER TABLE {self.table_identifier(table)} ALTER COLUMN {dest_spec};""" )
def _apply_meta_schema_json(self, sess, dataset, src_value, dest_value): src_schema = Schema.from_column_dicts(src_value) dest_schema = Schema.from_column_dicts(dest_value) diff_types = src_schema.diff_types(dest_schema) deletes = diff_types.pop("deletes") name_updates = diff_types.pop("name_updates") type_updates = diff_types.pop("type_updates") if any(dt for dt in diff_types.values()): raise RuntimeError( f"This schema change not supported by update - should be drop + re-write_full: {diff_types}" ) table = dataset.table_name for col_id in deletes: src_name = src_schema[col_id].name sess.execute( f""" ALTER TABLE {self.table_identifier(table)} DROP COLUMN {self.quote(src_name)};""" ) for col_id in name_updates: src_name = src_schema[col_id].name dest_name = dest_schema[col_id].name sess.execute( f""" ALTER TABLE {self.table_identifier(table)} RENAME COLUMN {self.quote(src_name)} TO {self.quote(dest_name)}; """ ) for col_id in type_updates: col = dest_schema[col_id] dest_spec = KartAdapter_MySql.v2_column_schema_to_sql_spec(col, dataset) sess.execute( f"""ALTER TABLE {self.table_identifier(table)} MODIFY {dest_spec};""" )
def test_adapt_schema(): schema = Schema.from_column_dicts(V2_SCHEMA_DATA) dataset = FakeDataset() dataset.schema = schema dataset.has_geometry = schema.has_geometry dataset.tree = dataset dataset.name = "test_dataset" sqlite_table_info = KartAdapter_GPKG.generate_sqlite_table_info(dataset) assert sqlite_table_info == [ { "cid": 0, "name": "OBJECTID", "pk": 1, "type": "INTEGER", "notnull": 1, "dflt_value": None, }, { "cid": 1, "name": "GEOMETRY", "pk": 0, "type": "GEOMETRY", "notnull": 0, "dflt_value": None, }, { "cid": 2, "name": "Ward", "pk": 0, "type": "TEXT", "notnull": 0, "dflt_value": None, }, { "cid": 3, "name": "Shape_Leng", "pk": 0, "type": "REAL", "notnull": 0, "dflt_value": None, }, { "cid": 4, "name": "Shape_Area", "pk": 0, "type": "REAL", "notnull": 0, "dflt_value": None, }, ]
def test_pk_encoder_string_pk(): schema = Schema.from_column_dicts([{ "name": "mypk", "dataType": "text", "id": "abc123" }]) ds = TableV3.new_dataset_for_writing("mytable", schema, MemoryRepo()) e = ds.feature_path_encoder assert isinstance(e, MsgpackHashPathEncoder) assert e.encoding == "base64" assert e.branches == 64 assert e.levels == 4 assert ds.encode_1pk_to_path( "") == "mytable/.table-dataset/feature/I/6/M/_/kaA=" assert (ds.encode_1pk_to_path("Dave") == "mytable/.table-dataset/feature/s/v/7/j/kaREYXZl")