def test_schema_change_roundtrip(gen_uuid): old_schema = Schema([ ColumnSchema(gen_uuid(), "ID", "integer", 0), ColumnSchema(gen_uuid(), "given_name", "text", None), ColumnSchema(gen_uuid(), "surname", "text", None), ColumnSchema(gen_uuid(), "date_of_birth", "date", None), ]) new_schema = Schema([ ColumnSchema(old_schema[0].id, "personnel_id", "integer", 0), ColumnSchema(gen_uuid(), "tax_file_number", "text", None), ColumnSchema(old_schema[2].id, "last_name", "text", None), ColumnSchema(old_schema[1].id, "first_name", "text", None), ColumnSchema(gen_uuid(), "middle_names", "text", None), ]) # Updating the schema without updating features is only possible # if the old and new schemas have the same primary key columns: assert old_schema.is_pk_compatible(new_schema) feature_tuple = (7, "Joe", "Bloggs", "1970-01-01") feature_dict = { "given_name": "Joe", "surname": "Bloggs", "date_of_birth": "1970-01-01", "ID": 7, } feature_path, feature_data = EMPTY_DATASET.encode_feature( feature_tuple, old_schema) feature_path2, feature_data2 = EMPTY_DATASET.encode_feature( feature_dict, old_schema) # Either encode method should give the same result. assert (feature_path, feature_data) == (feature_path2, feature_data2) # The dataset should store only the current schema, but all legends. schema_path, schema_data = EMPTY_DATASET.encode_schema(new_schema) new_legend_path, new_legend_data = EMPTY_DATASET.encode_legend( new_schema.legend) old_legend_path, old_legend_data = EMPTY_DATASET.encode_legend( old_schema.legend) tree = DictTree({ schema_path: schema_data, new_legend_path: new_legend_data, old_legend_path: old_legend_data, feature_path: feature_data, }) dataset2 = Dataset2(tree / DATASET_PATH, DATASET_PATH) # Old columns that are not present in the new schema are gone. # New columns that are not present in the old schema have 'None's. roundtripped = dataset2.get_feature(path=feature_path, keys=False) assert roundtripped == (7, None, "Bloggs", "Joe", None) roundtripped = dataset2.get_feature(path=feature_path, keys=True) assert roundtripped == { "personnel_id": 7, "tax_file_number": None, "last_name": "Bloggs", "first_name": "Joe", "middle_names": None, }
def test_align_schema(gen_uuid): old_schema = Schema( [ ColumnSchema(gen_uuid(), "ID", "integer", 0), ColumnSchema(gen_uuid(), "first_name", "text", None), ColumnSchema(gen_uuid(), "last_name", "text", None), ColumnSchema(gen_uuid(), "date_of_birth", "date", None), ] ) new_schema = Schema( [ ColumnSchema(gen_uuid(), "personnel_id", "integer", 0), ColumnSchema(gen_uuid(), "tax_file_number", "text", None), ColumnSchema(gen_uuid(), "last_name", "text", None), ColumnSchema(gen_uuid(), "first_name", "text", None), ColumnSchema(gen_uuid(), "middle_names", "text", None), ] ) aligned_schema = old_schema.align_to_self(new_schema) assert [c.name for c in aligned_schema] == [ "personnel_id", "tax_file_number", "last_name", "first_name", "middle_names", ] aligned = {} for old_col in old_schema: for aligned_col in aligned_schema: if aligned_col.id == old_col.id: aligned[old_col.name] = aligned_col.name assert aligned == { "ID": "personnel_id", "first_name": "first_name", "last_name": "last_name", } diff_counts = old_schema.diff_type_counts(aligned_schema) assert diff_counts == { "inserts": 2, "deletes": 1, "name_updates": 1, "position_updates": 1, "type_updates": 0, "pk_updates": 0, }
def test_feature_roundtrip(gen_uuid): schema = Schema([ ColumnSchema(gen_uuid(), "geom", "geometry", None, **GEOM_TYPE_INFO), ColumnSchema(gen_uuid(), "id", "integer", 1, size=64), ColumnSchema(gen_uuid(), "artist", "text", 0, length=200), ColumnSchema(gen_uuid(), "recording", "blob", None), ]) schema_path, schema_data = EMPTY_DATASET.encode_schema(schema) legend_path, legend_data = EMPTY_DATASET.encode_legend(schema.legend) # Feature tuples must be in schema order: feature_tuple = ("010100000087BF756489EF5C4C", 7, "GIS Choir", b"MP3") # But for feature dicts, the initialisation order is not important. feature_dict = { "artist": "GIS Choir", "recording": b"MP3", "id": 7, "geom": "010100000087BF756489EF5C4C", } feature_path, feature_data = EMPTY_DATASET.encode_feature( feature_tuple, schema) feature_path2, feature_data2 = EMPTY_DATASET.encode_feature( feature_dict, schema) # Either encode method should give the same result. assert (feature_path, feature_data) == (feature_path2, feature_data2) tree = DictTree({ schema_path: schema_data, legend_path: legend_data, feature_path: feature_data }) dataset2 = Dataset2(tree / DATASET_PATH, DATASET_PATH) roundtripped_tuple = dataset2.get_feature(path=feature_path, keys=False) assert roundtripped_tuple is not feature_tuple assert roundtripped_tuple == feature_tuple roundtripped_dict = dataset2.get_feature(path=feature_path, keys=True) assert roundtripped_dict is not feature_dict assert roundtripped_dict == feature_dict
def test_schema_roundtrip(gen_uuid): orig = Schema([ ColumnSchema(gen_uuid(), "geom", "geometry", None, **GEOM_TYPE_INFO), ColumnSchema(gen_uuid(), "id", "integer", 1, size=64), ColumnSchema(gen_uuid(), "artist", "text", 0, length=200), ColumnSchema(gen_uuid(), "recording", "blob", None), ]) roundtripped = Schema.loads(orig.dumps()) assert roundtripped is not orig assert roundtripped == orig path, data = EMPTY_DATASET.encode_schema(orig) tree = DictTree({path: data}) dataset2 = Dataset2(tree / DATASET_PATH, DATASET_PATH) roundtripped = dataset2.schema assert roundtripped is not orig assert roundtripped == orig
def test_schema_diff_as_text(gen_uuid): old_schema = Schema( [ ColumnSchema(gen_uuid(), "fid", "integer", 0, size=64), ColumnSchema( gen_uuid(), "geom", "geometry", None, geometryType="MULTIPOLYGON", geometryCRS="EPSG:2193", ), ColumnSchema(gen_uuid(), "building_id", "integer", None, size=32), ColumnSchema(gen_uuid(), "name", "text", None), ColumnSchema(gen_uuid(), "use", "text", None), ColumnSchema(gen_uuid(), "suburb_locality", "text", None), ColumnSchema(gen_uuid(), "town_city", "text", None), ColumnSchema(gen_uuid(), "territorial_authority", "text", None), ColumnSchema(gen_uuid(), "last_modified", "date", None), ] ) new_schema = Schema( [ ColumnSchema(gen_uuid(), "fid", "integer", 0, size=64), ColumnSchema(gen_uuid(), "building_id", "integer", None, size=64), ColumnSchema(gen_uuid(), "name", "text", None, size=40), ColumnSchema(gen_uuid(), "territorial_authority", "text", None), ColumnSchema(gen_uuid(), "use", "text", None), ColumnSchema(gen_uuid(), "colour", "integer", None, size=32), ColumnSchema(gen_uuid(), "town_city", "text", None), ColumnSchema( gen_uuid(), "geom", "geometry", None, geometryType="MULTIPOLYGON", geometryCRS="EPSG:2193", ), ColumnSchema(gen_uuid(), "last_modified", "date", None), ] ) aligned_schema = old_schema.align_to_self(new_schema) output = schema_diff_as_text(old_schema, aligned_schema) assert click.unstyle(output).splitlines() == [ " [", " {", ' "id": "b11ea716-6b85-f672-741f-8281aaa04bef",', ' "name": "fid",', ' "dataType": "integer",', ' "primaryKeyIndex": 0,', ' "size": 64', " },", "- {", '- "id": "0d167b8b-294f-c2be-4747-bc947672d3a0",', '- "name": "geom",', '- "dataType": "geometry",', '- "primaryKeyIndex": null,', '- "geometryType": "MULTIPOLYGON",', '- "geometryCRS": "EPSG:2193"', "- },", " {", ' "id": "0f28f35f-89d8-2b93-40d7-30abe42c69ea",', ' "name": "building_id",', ' "dataType": "integer",', ' "primaryKeyIndex": null,', '- "size": 32,', '+ "size": 64,', " },", " {", ' "id": "b5c69fa8-f48f-59bb-7aab-95225daf4774",', ' "name": "name",', ' "dataType": "text",', ' "primaryKeyIndex": null,', '+ "size": 40,', " },", "+ {", '+ "id": "d087bf39-1c76-fdd9-1315-0e81c6bd360f",', '+ "name": "territorial_authority",', '+ "dataType": "text",', '+ "primaryKeyIndex": null', "+ },", " {", ' "id": "9f1924ac-097a-fc0a-b168-a06e8db32af7",', ' "name": "use",', ' "dataType": "text",', ' "primaryKeyIndex": null', " },", "- {", '- "id": "1bcf7a4a-19e9-9752-6264-0fd1d387633b",', '- "name": "suburb_locality",', '- "dataType": "text",', '- "primaryKeyIndex": null', "- },", "+ {", '+ "id": "0f4e1e5b-9adb-edbe-6cbd-0ee0140448e6",', '+ "name": "colour",', '+ "dataType": "integer",', '+ "primaryKeyIndex": null,', '+ "size": 32', "+ },", " {", ' "id": "1777c850-baa2-6d52-dfcd-309f1741ff51",', ' "name": "town_city",', ' "dataType": "text",', ' "primaryKeyIndex": null', " },", "- {", '- "id": "d087bf39-1c76-fdd9-1315-0e81c6bd360f",', '- "name": "territorial_authority",', '- "dataType": "text",', '- "primaryKeyIndex": null', "- },", "+ {", '+ "id": "0d167b8b-294f-c2be-4747-bc947672d3a0",', '+ "name": "geom",', '+ "dataType": "geometry",', '+ "primaryKeyIndex": null,', '+ "geometryType": "MULTIPOLYGON",', '+ "geometryCRS": "EPSG:2193"', "+ },", " {", ' "id": "db82ba8c-c997-4bf1-87ef-b5108bdccde7",', ' "name": "last_modified",', ' "dataType": "date",', ' "primaryKeyIndex": null', " },", " ]", ]
def _pki(pk_index): # Returns an arbitrary ColumnSchema, but with the given pk_index property. id = gen_uuid() return ColumnSchema(id, id[:8], "integer", pk_index)