Exemple #1
0
    def _is_schema_update_supported(self, schema_delta):
        if not schema_delta.old_value or not schema_delta.new_value:
            return False

        old_schema = Schema.from_column_dicts(schema_delta.old_value)
        new_schema = Schema.from_column_dicts(schema_delta.new_value)
        dt = old_schema.diff_type_counts(new_schema)
        # We do support name_updates, but we don't support any other type of schema update
        # - except by rewriting the entire table.
        dt.pop("name_updates")
        return sum(dt.values()) == 0
Exemple #2
0
def abcdef_schema():
    return Schema.from_column_dicts([
        {
            "id": "a",
            "name": "a",
            "dataType": "integer",
            "primaryKeyIndex": 0,
            "size": 64,
        },
        {
            "id": "b",
            "name": "b",
            "dataType": "geometry",
        },
        {
            "id": "c",
            "name": "c",
            "dataType": "boolean",
        },
        {
            "id": "d",
            "name": "d",
            "dataType": "float",
        },
        {
            "id": "e",
            "name": "e",
            "dataType": "text",
        },
        {
            "id": "f",
            "name": "f",
            "dataType": "text",
        },
    ])
Exemple #3
0
def test_pk_encoder_int_pk():
    schema = Schema.from_column_dicts([{
        "name": "mypk",
        "dataType": "integer",
        "size": 64,
        "id": "abc123",
        "primaryKeyIndex": 0,
    }])
    ds = TableV3.new_dataset_for_writing("mytable", schema, MemoryRepo())
    e = ds.feature_path_encoder
    assert isinstance(e, IntPathEncoder)
    assert e.encoding == "base64"
    assert e.branches == 64
    assert e.levels == 4

    with pytest.raises(TypeError):
        ds.encode_1pk_to_path("Dave")
    with pytest.raises(TypeError):
        ds.encode_1pk_to_path(0.1)

    assert ds.encode_1pk_to_path(
        0) == "mytable/.table-dataset/feature/A/A/A/A/kQA="
    assert ds.encode_1pk_to_path(
        1) == "mytable/.table-dataset/feature/A/A/A/A/kQE="
    assert ds.encode_1pk_to_path(
        -1) == "mytable/.table-dataset/feature/_/_/_/_/kf8="
    assert (ds.encode_1pk_to_path(1181) ==
            "mytable/.table-dataset/feature/A/A/A/S/kc0EnQ==")
    # trees hit wraparound with large PKs, but don't break
    assert (ds.encode_1pk_to_path(
        64**5) == "mytable/.table-dataset/feature/A/A/A/A/kc5AAAAA")
    assert (ds.encode_1pk_to_path(-(64**5)) ==
            "mytable/.table-dataset/feature/A/A/A/A/kdLAAAAA")
Exemple #4
0
    def _get_old_and_new_schema(self, ds_path, ds_diff):
        from kart.tabular.schema import Schema

        old_schema = new_schema = None
        schema_delta = ds_diff.recursive_get(["meta", "schema.json"])
        if schema_delta and schema_delta.old_value:
            old_schema = Schema.from_column_dicts(schema_delta.old_value)
        if schema_delta and schema_delta.new_value:
            new_schema = Schema.from_column_dicts(schema_delta.new_value)
        if old_schema or new_schema:
            return old_schema, new_schema

        # No diff - old and new schemas are the same.
        ds = self.base_rs.datasets().get(
            ds_path) or self.target_rs.datasets().get(ds_path)
        schema = ds.schema
        return schema, schema
Exemple #5
0
 def sqlserver_to_v2_schema(cls, ms_table_info, ms_crs_info, id_salt):
     """Generate a V2 schema from the given SQL server metadata."""
     return Schema(
         [
             cls._sqlserver_to_column_schema(col, ms_crs_info, id_salt)
             for col in ms_table_info
         ]
     )
Exemple #6
0
    def _apply_meta_schema_json(self, sess, dataset, src_value, dest_value):
        src_schema = Schema.from_column_dicts(src_value)
        dest_schema = Schema.from_column_dicts(dest_value)

        diff_types = src_schema.diff_types(dest_schema)
        name_updates = diff_types.pop("name_updates")
        if any(dt for dt in diff_types.values()):
            raise RuntimeError(
                f"This schema change not supported by update - should be drop + rewrite_full: {diff_types}"
            )

        for col_id in name_updates:
            src_name = src_schema[col_id].name
            dest_name = dest_schema[col_id].name
            sess.execute(f"""
                ALTER TABLE {self.table_identifier(dataset)}
                RENAME COLUMN {self.quote(src_name)} TO {self.quote(dest_name)}
                """)
Exemple #7
0
    def _apply_meta_schema_json(self, sess, dataset, src_value, dest_value):
        src_schema = Schema.from_column_dicts(src_value)
        dest_schema = Schema.from_column_dicts(dest_value)

        diff_types = src_schema.diff_types(dest_schema)

        deletes = diff_types.pop("deletes")
        name_updates = diff_types.pop("name_updates")
        type_updates = diff_types.pop("type_updates")

        if any(dt for dt in diff_types.values()):
            raise RuntimeError(
                f"This schema change not supported by update - should be drop + re-write_full: {diff_types}"
            )

        table = dataset.table_name
        for col_id in deletes:
            src_name = src_schema[col_id].name
            sess.execute(
                f"""
                ALTER TABLE {self.table_identifier(table)}
                DROP COLUMN {self.quote(src_name)};
                """
            )

        for col_id in name_updates:
            src_name = src_schema[col_id].name
            dest_name = dest_schema[col_id].name
            sess.execute(
                """sp_rename :qualifified_src_name, :dest_name, 'COLUMN';""",
                {
                    "qualifified_src_name": f"{self.db_schema}.{table}.{src_name}",
                    "dest_name": dest_name,
                },
            )

        for col_id in type_updates:
            col = dest_schema[col_id]
            dest_spec = KartAdapter_SqlServer.v2_column_schema_to_sql_spec(col, dataset)
            sess.execute(
                f"""ALTER TABLE {self.table_identifier(table)} ALTER COLUMN {dest_spec};"""
            )
Exemple #8
0
    def _apply_meta_schema_json(self, sess, dataset, src_value, dest_value):
        src_schema = Schema.from_column_dicts(src_value)
        dest_schema = Schema.from_column_dicts(dest_value)

        diff_types = src_schema.diff_types(dest_schema)

        deletes = diff_types.pop("deletes")
        name_updates = diff_types.pop("name_updates")
        type_updates = diff_types.pop("type_updates")

        if any(dt for dt in diff_types.values()):
            raise RuntimeError(
                f"This schema change not supported by update - should be drop + re-write_full: {diff_types}"
            )

        table = dataset.table_name
        for col_id in deletes:
            src_name = src_schema[col_id].name
            sess.execute(
                f"""
                ALTER TABLE {self.table_identifier(table)}
                DROP COLUMN {self.quote(src_name)};"""
            )

        for col_id in name_updates:
            src_name = src_schema[col_id].name
            dest_name = dest_schema[col_id].name
            sess.execute(
                f"""
                ALTER TABLE {self.table_identifier(table)}
                RENAME COLUMN {self.quote(src_name)} TO {self.quote(dest_name)};
                """
            )

        for col_id in type_updates:
            col = dest_schema[col_id]
            dest_spec = KartAdapter_MySql.v2_column_schema_to_sql_spec(col, dataset)
            sess.execute(
                f"""ALTER TABLE {self.table_identifier(table)} MODIFY {dest_spec};"""
            )
Exemple #9
0
    def _gpkg_to_v2_schema(cls, gpkg_meta_items, id_salt):
        """Generate a v2 Schema from the given gpkg meta items."""
        sqlite_table_info = gpkg_meta_items.get("sqlite_table_info")
        if not sqlite_table_info:
            return None

        def _sort_by_cid(sqlite_col_info):
            return sqlite_col_info["cid"]

        return Schema([
            cls._gpkg_to_column_schema(col, gpkg_meta_items, id_salt)
            for col in sorted(sqlite_table_info, key=_sort_by_cid)
        ])
Exemple #10
0
def test_adapt_schema():
    schema = Schema.from_column_dicts(V2_SCHEMA_DATA)
    dataset = FakeDataset()
    dataset.schema = schema
    dataset.has_geometry = schema.has_geometry
    dataset.tree = dataset
    dataset.name = "test_dataset"

    sqlite_table_info = KartAdapter_GPKG.generate_sqlite_table_info(dataset)
    assert sqlite_table_info == [
        {
            "cid": 0,
            "name": "OBJECTID",
            "pk": 1,
            "type": "INTEGER",
            "notnull": 1,
            "dflt_value": None,
        },
        {
            "cid": 1,
            "name": "GEOMETRY",
            "pk": 0,
            "type": "GEOMETRY",
            "notnull": 0,
            "dflt_value": None,
        },
        {
            "cid": 2,
            "name": "Ward",
            "pk": 0,
            "type": "TEXT",
            "notnull": 0,
            "dflt_value": None,
        },
        {
            "cid": 3,
            "name": "Shape_Leng",
            "pk": 0,
            "type": "REAL",
            "notnull": 0,
            "dflt_value": None,
        },
        {
            "cid": 4,
            "name": "Shape_Area",
            "pk": 0,
            "type": "REAL",
            "notnull": 0,
            "dflt_value": None,
        },
    ]
Exemple #11
0
def test_schema_roundtrip(gen_uuid):
    orig = Schema([
        ColumnSchema(gen_uuid(), "geom", "geometry", None, **GEOM_TYPE_INFO),
        ColumnSchema(gen_uuid(), "id", "integer", 1, size=64),
        ColumnSchema(gen_uuid(), "artist", "text", 0, length=200),
        ColumnSchema(gen_uuid(), "recording", "blob", None),
    ])

    roundtripped = Schema.loads(orig.dumps())

    assert roundtripped is not orig
    assert roundtripped == orig

    empty_dataset = TableV3.new_dataset_for_writing(DATASET_PATH, None,
                                                    MemoryRepo())
    path, data = empty_dataset.encode_schema(orig)
    tree = MemoryTree({path: data})

    tableV3 = TableV3(tree / DATASET_PATH, DATASET_PATH, MemoryRepo())
    roundtripped = tableV3.schema

    assert roundtripped is not orig
    assert roundtripped == orig
Exemple #12
0
def test_pk_encoder_string_pk():
    schema = Schema.from_column_dicts([{
        "name": "mypk",
        "dataType": "text",
        "id": "abc123"
    }])
    ds = TableV3.new_dataset_for_writing("mytable", schema, MemoryRepo())
    e = ds.feature_path_encoder
    assert isinstance(e, MsgpackHashPathEncoder)
    assert e.encoding == "base64"
    assert e.branches == 64
    assert e.levels == 4
    assert ds.encode_1pk_to_path(
        "") == "mytable/.table-dataset/feature/I/6/M/_/kaA="
    assert (ds.encode_1pk_to_path("Dave") ==
            "mytable/.table-dataset/feature/s/v/7/j/kaREYXZl")
Exemple #13
0
def test_feature_roundtrip(gen_uuid):
    schema = Schema([
        ColumnSchema(gen_uuid(), "geom", "geometry", None, **GEOM_TYPE_INFO),
        ColumnSchema(gen_uuid(), "id", "integer", 1, size=64),
        ColumnSchema(gen_uuid(), "artist", "text", 0, length=200),
        ColumnSchema(gen_uuid(), "recording", "blob", None),
    ])
    empty_dataset = TableV3.new_dataset_for_writing(DATASET_PATH, schema,
                                                    MemoryRepo())
    schema_path, schema_data = empty_dataset.encode_schema(schema)
    legend_path, legend_data = empty_dataset.encode_legend(schema.legend)

    # encode_feature also accepts a feature tuple, but mostly we use dicts everywhere.
    feature_tuple = ("010100000087BF756489EF5C4C", 7, "GIS Choir", b"MP3")
    # When encoding dicts, we use the keys - so the correct initialisation order is not necessary.
    feature_dict = {
        "artist": "GIS Choir",
        "recording": b"MP3",
        "id": 7,
        "geom": "010100000087BF756489EF5C4C",
    }

    feature_path, feature_data = empty_dataset.encode_feature(
        feature_tuple, schema)
    feature_path2, feature_data2 = empty_dataset.encode_feature(
        feature_dict, schema)
    # Either encode method should give the same result.
    assert (feature_path, feature_data) == (feature_path2, feature_data2)

    tree = MemoryTree({
        schema_path: schema_data,
        legend_path: legend_data,
        feature_path: feature_data
    })

    tableV3 = TableV3(tree / DATASET_PATH, DATASET_PATH, MemoryRepo())
    roundtripped_feature = tableV3.get_feature(path=feature_path)
    assert roundtripped_feature is not feature_dict
    assert roundtripped_feature == feature_dict
    # We guarantee that the dict iterates in row-order.
    assert tuple(roundtripped_feature.values()) == feature_tuple
Exemple #14
0
def test_schema_change_roundtrip(gen_uuid):
    old_schema = Schema([
        ColumnSchema(gen_uuid(), "ID", "integer", 0),
        ColumnSchema(gen_uuid(), "given_name", "text", None),
        ColumnSchema(gen_uuid(), "surname", "text", None),
        ColumnSchema(gen_uuid(), "date_of_birth", "date", None),
    ])
    new_schema = Schema([
        ColumnSchema(old_schema[0].id, "personnel_id", "integer", 0),
        ColumnSchema(gen_uuid(), "tax_file_number", "text", None),
        ColumnSchema(old_schema[2].id, "last_name", "text", None),
        ColumnSchema(old_schema[1].id, "first_name", "text", None),
        ColumnSchema(gen_uuid(), "middle_names", "text", None),
    ])
    # Updating the schema without updating features is only possible
    # if the old and new schemas have the same primary key columns:
    assert old_schema.is_pk_compatible(new_schema)

    feature_tuple = (7, "Joe", "Bloggs", "1970-01-01")
    feature_dict = {
        "given_name": "Joe",
        "surname": "Bloggs",
        "date_of_birth": "1970-01-01",
        "ID": 7,
    }

    empty_dataset = TableV3.new_dataset_for_writing(DATASET_PATH, old_schema,
                                                    MemoryRepo())
    feature_path, feature_data = empty_dataset.encode_feature(
        feature_tuple, old_schema)
    feature_path2, feature_data2 = empty_dataset.encode_feature(
        feature_dict, old_schema)
    # Either encode method should give the same result.
    assert (feature_path, feature_data) == (feature_path2, feature_data2)

    # The dataset should store only the current schema, but all legends.
    schema_path, schema_data = empty_dataset.encode_schema(new_schema)
    new_legend_path, new_legend_data = empty_dataset.encode_legend(
        new_schema.legend)
    old_legend_path, old_legend_data = empty_dataset.encode_legend(
        old_schema.legend)
    tree = MemoryTree({
        schema_path: schema_data,
        new_legend_path: new_legend_data,
        old_legend_path: old_legend_data,
        feature_path: feature_data,
    })

    tableV3 = TableV3(tree / DATASET_PATH, DATASET_PATH, MemoryRepo())
    # Old columns that are not present in the new schema are gone.
    # New columns that are not present in the old schema have 'None's.
    roundtripped = tableV3.get_feature(path=feature_path)
    assert roundtripped == {
        "personnel_id": 7,
        "tax_file_number": None,
        "last_name": "Bloggs",
        "first_name": "Joe",
        "middle_names": None,
    }
    # We guarantee that the dict iterates in row-order.
    assert tuple(roundtripped.values()) == (7, None, "Bloggs", "Joe", None)
Exemple #15
0
 def postgis_to_v2_schema(cls, pg_table_info, geom_cols_info, id_salt):
     """Generate a V2 schema from the given postgis metadata tables."""
     return Schema([
         cls._postgis_to_column_schema(col, geom_cols_info, id_salt)
         for col in pg_table_info
     ])