コード例 #1
0
def test_clone_with_spatial_filter(git_with_spatial_filter_support,
                                   data_archive, cli_runner, tmp_path):
    geom = SPATIAL_FILTER_GEOMETRY["polygons"]
    crs = SPATIAL_FILTER_CRS["polygons"]

    file_path = (tmp_path / "spatialfilter.txt").resolve()
    file_path.write_text(f"{crs}\n\n{geom}\n", encoding="utf-8")

    with data_archive("polygons-with-feature-envelopes") as repo1_path:
        repo1_url = f"file://{repo1_path.resolve()}"
        # Clone repo using spatial filter
        repo2_path = tmp_path / "repo2"
        r = cli_runner.invoke([
            "clone",
            repo1_url,
            repo2_path,
            f"--spatial-filter=@{file_path}",
            "--spatial-filter-after-clone",
        ])
        assert r.exit_code == 0, r.stderr

        # The resulting repo has the spatial filter configured locally.
        repo2 = KartRepo(repo2_path)
        assert repo2.config["kart.spatialfilter.geometry"].startswith(
            "POLYGON ((174.879 -37.8277,")
        assert repo2.config["kart.spatialfilter.crs"] == crs

        with repo2.working_copy.session() as sess:
            assert H.row_count(sess, H.POLYGONS.LAYER) == 44

        # However, the entire polygons layer was cloned due to --spatial-filter-after-clone.
        # The spatial filter is only applied locally... all features are still present.
        assert local_features(
            repo2.datasets()[H.POLYGONS.LAYER]) == H.POLYGONS.ROWCOUNT

        # Try it again without --spatial-filter-after-clone.
        repo3_path = tmp_path / "repo3"
        r = cli_runner.invoke(
            ["clone", repo1_url, repo3_path, f"--spatial-filter=@{file_path}"])
        assert r.exit_code == 0, r.stderr

        repo3 = KartRepo(repo3_path)
        assert repo3.config["kart.spatialfilter.geometry"].startswith(
            "POLYGON ((174.879 -37.8277,")
        assert repo3.config["kart.spatialfilter.crs"] == crs
        ds = repo3.datasets()[H.POLYGONS.LAYER]

        local_feature_count = local_features(ds)
        assert local_feature_count != H.POLYGONS.ROWCOUNT
        assert local_feature_count == 46

        with repo3.working_copy.session() as sess:
            assert H.row_count(sess, H.POLYGONS.LAYER) == 44
コード例 #2
0
def test_reclone_with_larger_spatial_filter(git_with_spatial_filter_support,
                                            data_archive, cli_runner,
                                            tmp_path):
    geom = SPATIAL_FILTER_GEOMETRY["polygons"]
    crs = SPATIAL_FILTER_CRS["polygons"]

    file_path = (tmp_path / "spatialfilter.txt").resolve()
    file_path.write_text(f"{crs}\n\n{geom}\n", encoding="utf-8")

    EMPTY_SPATIAL_FILTER = "EPSG:4326;POLYGON((0 0,0 1,1 1,1 0,0 0))"

    with data_archive("polygons-with-feature-envelopes") as repo1_path:
        repo1_url = f"file://{repo1_path.resolve()}"
        # Clone repo using spatial filter
        repo2_path = tmp_path / "repo2"
        # TODO: Invert some of this test when --spatial-filter-during-clone is inverted.
        r = cli_runner.invoke([
            "clone",
            repo1_url,
            repo2_path,
            f"--spatial-filter={EMPTY_SPATIAL_FILTER}",
            "--spatial-filter-during-clone",
        ])
        assert r.exit_code == 0, r.stderr

        repo2 = KartRepo(repo2_path)
        with repo2.working_copy.session() as sess:
            assert H.row_count(sess, H.POLYGONS.LAYER) == 0
        assert local_features(repo2.datasets()[H.POLYGONS.LAYER]) == 0

        r = cli_runner.invoke(
            ["-C", repo2_path, "checkout", f"--spatial-filter=@{file_path}"])
        assert r.exit_code == 0, r.stderr

        with repo2.working_copy.session() as sess:
            assert H.row_count(sess, H.POLYGONS.LAYER) == 44
        assert local_features(repo2.datasets()[H.POLYGONS.LAYER]) == 46

        r = cli_runner.invoke(
            ["-C", repo2_path, "checkout", "--spatial-filter=none"])

        with repo2.working_copy.session() as sess:
            assert H.row_count(sess, H.POLYGONS.LAYER) == H.POLYGONS.ROWCOUNT
        assert local_features(
            repo2.datasets()[H.POLYGONS.LAYER]) == H.POLYGONS.ROWCOUNT

        r = cli_runner.invoke(
            ["-C", repo2_path, "checkout", f"--spatial-filter=@{file_path}"])
        with repo2.working_copy.session() as sess:
            assert H.row_count(sess, H.POLYGONS.LAYER) == 44
        assert local_features(
            repo2.datasets()[H.POLYGONS.LAYER]) == H.POLYGONS.ROWCOUNT
コード例 #3
0
def test_checkout_workingcopy(archive, table, commit_sha, data_archive,
                              tmp_path, cli_runner):
    """ Checkout a working copy to edit """
    with data_archive(archive) as repo_path:
        H.clear_working_copy()

        repo = KartRepo(repo_path)
        dataset = repo.datasets()[table]
        geom_cols = dataset.schema.geometry_columns

        r = cli_runner.invoke(["checkout"])
        wc_path = Path(repo.config["kart.workingcopy.location"])
        assert r.exit_code == 0, r
        assert r.stdout.splitlines() == [
            f"Creating working copy at {wc_path} ..."
        ]
        assert wc_path.exists()
        wc = repo.working_copy

        assert repo.head.name == "refs/heads/main"
        assert repo.head.shorthand == "main"
        assert wc.get_db_tree() == repo.head_tree.hex

        if geom_cols:
            with wc.session() as sess:
                spatial_index_count = sess.execute(
                    f"""SELECT COUNT(*) FROM "rtree_{table}_{geom_cols[0].name}";"""
                ).scalar()
                assert spatial_index_count == dataset.feature_count

        table_spec = KartAdapter_GPKG.v2_schema_to_sql_spec(dataset.schema)
        expected_col_spec = f"{KartAdapter_GPKG.quote(dataset.primary_key)} INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL"
        assert expected_col_spec in table_spec
コード例 #4
0
def _test_postgis_import(
        repo_path,
        cli_runner,
        chdir,
        *,
        table_name,
        pk_name="id",
        pk_size=64,
        import_args=(),
):
    r = cli_runner.invoke(["init", repo_path])
    assert r.exit_code == 0, r
    with chdir(repo_path):
        r = cli_runner.invoke([
            "import",
            os.environ["KART_POSTGRES_URL"],
            table_name,
            *import_args,
        ])
        assert r.exit_code == 0, r
    # now check metadata
    repo = KartRepo(repo_path)
    dataset = repo.datasets()[table_name]

    meta_items = dict(dataset.meta_items())
    meta_item_keys = set(meta_items.keys())
    assert "schema.json" in meta_item_keys
    crs_keys = meta_item_keys - {"title", "description", "schema.json"}
    assert len(crs_keys) == 1
    crs_key = next(iter(crs_keys))
    assert crs_key.startswith("crs/EPSG:") and crs_key.endswith(".wkt")
コード例 #5
0
def test_postgis_import_with_sampled_geometry_dimension(
    postgis_db,
    data_archive,
    tmp_path,
    cli_runner,
    request,
    chdir,
):
    with postgis_db.connect() as conn:
        conn.execute("""DROP TABLE IF EXISTS points_xyz CASCADE;""")
        conn.execute(
            """CREATE TABLE points_xyz (fid BIGINT PRIMARY KEY, shape GEOMETRY);"""
        )
        conn.execute(
            """INSERT INTO points_xyz (fid, shape) VALUES (1, ST_GeomFromText('POINT(1 2 3)', 4326));"""
        )

        _test_postgis_import(
            tmp_path / "repo",
            cli_runner,
            chdir,
            table_name="points_xyz",
            pk_name="fid",
            pk_size=64,
            import_args=["--primary-key=fid"],
        )

        repo = KartRepo(tmp_path / "repo")
        dataset = repo.datasets()["points_xyz"]
        [geom_col] = dataset.schema.geometry_columns
        assert geom_col.extra_type_info["geometryType"] == "GEOMETRY Z"

        conn.execute("""DROP TABLE IF EXISTS points_xyz CASCADE;""")
コード例 #6
0
def test_fast_import(data_archive, tmp_path, cli_runner, chdir):
    table = H.POINTS.LAYER
    with data_archive("gpkg-points") as data:
        # list tables
        repo_path = tmp_path / "repo"
        repo_path.mkdir()

        with chdir(repo_path):
            r = cli_runner.invoke(["init"])
            assert r.exit_code == 0, r

            repo = KartRepo(repo_path)

            source = TableImportSource.open(data /
                                            "nz-pa-points-topo-150k.gpkg",
                                            table=table)

            fast_import.fast_import_tables(repo, [source], from_commit=None)

            assert not repo.is_empty
            assert repo.head.name == "refs/heads/main"
            assert repo.head.shorthand == "main"

            dataset = repo.datasets()[table]
            assert dataset.VERSION == 3

            # has a single commit
            assert len([c for c in repo.walk(repo.head.target)]) == 1
            assert list(dataset.meta_items())

            # has the right number of features
            feature_count = sum(1 for f in dataset.features())
            assert feature_count == source.feature_count
コード例 #7
0
def test_spatially_filtered_merge(data_archive, cli_runner):
    # Make sure spatially filtered merges work (that is, make sure writing merged indexes with missing features works).
    # See https://github.com/koordinates/kart/issues/550
    with data_archive("points-with-feature-envelopes") as repo1_path:
        repo1_url = f"file://{repo1_path.resolve()}"

        with data_archive("points-spatial-filtered") as repo2_path:
            repo2 = KartRepo(repo2_path)
            repo2.config["remote.origin.url"] = repo1_url

            ds = repo2.datasets()[H.POINTS.LAYER]

            local_feature_count = local_features(ds)
            assert local_feature_count != H.POINTS.ROWCOUNT
            assert local_feature_count == 817

            r = cli_runner.invoke(["-C", repo2_path, "create-workingcopy"])
            assert r.exit_code == 0, r.stderr

            r = cli_runner.invoke(["-C", repo2_path, "checkout", "-b", "left"])
            assert r.exit_code == 0, r.stderr

            with repo2.working_copy.session() as sess:
                assert H.row_count(sess, H.POINTS.LAYER) == 302
                sess.execute(
                    f"DELETE FROM {H.POINTS.LAYER} WHERE fid % 3 != 0;")

            r = cli_runner.invoke(
                ["-C", repo2_path, "commit", "-m", "left-commit"])
            assert r.exit_code == 0, r.stderr

            r = cli_runner.invoke(
                ["-C", repo2_path, "checkout", "-b", "right", "HEAD^"])
            assert r.exit_code == 0, r.stderr

            with repo2.working_copy.session() as sess:
                assert H.row_count(sess, H.POINTS.LAYER) == 302
                sess.execute(
                    f"DELETE FROM {H.POINTS.LAYER} WHERE fid % 3 != 1;")

            r = cli_runner.invoke(
                ["-C", repo2_path, "commit", "-m", "right-commit"])
            assert r.exit_code == 0, r.stderr

            r = cli_runner.invoke(
                ["-C", repo2_path, "merge", "left", "-m", "merged"])
            assert r.exit_code == 0, r.stderr

            # Make sure we can do a full-read of the new commit without any problems -
            # See https://github.com/koordinates/kart/issues/552 which explains why running create-workingcopy
            # can fail after a commit in a spatial-filted repo (unless we are careful and use promisor-packfiles),
            # whereas running diff will generally succeed regardless.
            r = cli_runner.invoke(
                ["-C", repo2_path, "create-workingcopy", "--delete-existing"])
            assert r.exit_code == 0, r.stderr
コード例 #8
0
def test_spatially_filtered_commit(data_archive, cli_runner):
    # We use the points layer for this test since it uses consecutive integer PKs.
    # This means that promised features and locally features are likely to both be stored in the
    # same tree, which highlights a potential issue: https://github.com/koordinates/kart/issues/552
    with data_archive("points-with-feature-envelopes") as repo1_path:
        repo1_url = f"file://{repo1_path.resolve()}"

        with data_archive("points-spatial-filtered") as repo2_path:
            repo2 = KartRepo(repo2_path)
            repo2.config["remote.origin.url"] = repo1_url

            ds = repo2.datasets()[H.POINTS.LAYER]

            local_feature_count = local_features(ds)
            assert local_feature_count != H.POINTS.ROWCOUNT
            assert local_feature_count == 817

            r = cli_runner.invoke(["-C", repo2_path, "create-workingcopy"])
            assert r.exit_code == 0, r.stderr

            with repo2.working_copy.session() as sess:
                assert H.row_count(sess, H.POINTS.LAYER) == 302
                sess.execute(f"DELETE FROM {H.POINTS.LAYER}")

            r = cli_runner.invoke(
                ["-C", repo2_path, "commit", "-m", "delete-matching"])
            assert r.exit_code == 0, r.stderr

            ds = repo2.datasets()[H.POINTS.LAYER]
            assert ds.feature_count == H.POINTS.ROWCOUNT - 302
            assert local_features(ds) == 817 - 302

            # Make sure we can do a full-read of the new commit without any problems -
            # See https://github.com/koordinates/kart/issues/552 which explains why running create-workingcopy
            # can fail after a commit in a spatial-filted repo (unless we are careful and use promisor-packfiles),
            # whereas running diff will generally succeed regardless.
            r = cli_runner.invoke(
                ["-C", repo2_path, "create-workingcopy", "--delete-existing"])
            assert r.exit_code == 0, r.stderr
コード例 #9
0
def test_pk_encoder_legacy_hashed(data_archive_readonly):
    archive_path = Path("upgrade") / "v2.kart" / "points.tgz"
    with data_archive_readonly(archive_path) as repo_path:
        repo = KartRepo(repo_path)
        ds = repo.datasets()["nz_pa_points_topo_150k"]
        e = ds.feature_path_encoder
        assert isinstance(e, MsgpackHashPathEncoder)
        assert e.encoding == "hex"
        assert e.branches == 256
        assert e.levels == 2
        assert (ds.encode_1pk_to_path(1181) ==
                "nz_pa_points_topo_150k/.sno-dataset/feature/7b/36/kc0EnQ==")
        assert (ds.encode_1pk_to_path("Dave") ==
                "nz_pa_points_topo_150k/.sno-dataset/feature/b2/fe/kaREYXZl")
コード例 #10
0
def test_import_no_pk_performance(data_archive_readonly, benchmark):
    with data_archive_readonly("points") as repo_path:
        repo = KartRepo(repo_path)
        dataset = repo.datasets()["nz_pa_points_topo_150k"]
        dataset.meta_overrides = {}

        features = list(dataset.features())
        assert len(features) == 2143
        old_features = features[0:1000]
        new_features = features[1000:2143]

        pkis = PkGeneratingTableImportSource(dataset, repo)
        pkis.prev_dest_schema = dataset.schema
        pkis.primary_key = dataset.primary_key

        def _match_features_benchmark():
            # Exhaust generator:
            for _ in pkis._match_similar_features_and_remove(
                    old_features, new_features):
                pass

        benchmark(_match_features_benchmark)
コード例 #11
0
def test_feature_find_decode_performance(
    profile,
    archive,
    source_gpkg,
    table,
    data_archive,
    data_imported,
    benchmark,
    request,
):
    """ Check single-feature decoding performance """
    param_ids = H.parameter_ids(request)
    benchmark.group = (
        f"test_feature_find_decode_performance - {profile} - {param_ids[-1]}")

    repo_path = data_imported(archive, source_gpkg, table)
    repo = KartRepo(repo_path)
    dataset = repo.datasets()["mytable"]
    inner_tree = dataset.inner_tree

    with data_archive(archive) as data:
        with Db_GPKG.create_engine(data / source_gpkg).connect() as conn:
            num_rows = conn.execute(
                f"SELECT COUNT(*) FROM {table};").fetchone()[0]
            pk_field = Db_GPKG.pk_name(conn, table=table)
            pk = conn.execute(
                f"SELECT {pk_field} FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};"
            ).fetchone()[0]

    if profile == "get_feature_by_pk":
        benchmark(dataset.get_feature, pk)

    elif profile == "get_feature_from_data":
        feature_path = dataset.encode_1pk_to_path(pk, relative=True)
        feature_data = memoryview(inner_tree / feature_path)

        benchmark(dataset.get_feature, path=feature_path, data=feature_data)
    else:
        raise NotImplementedError(f"Unknown profile: {profile}")
コード例 #12
0
def test_spatially_filtered_partial_clone(data_archive, cli_runner):
    crs = SPATIAL_FILTER_CRS["polygons"]

    with data_archive("polygons-with-feature-envelopes") as repo1_path:
        repo1_url = f"file://{repo1_path.resolve()}"

        with data_archive("polygons-spatial-filtered") as repo2_path:
            repo2 = KartRepo(repo2_path)
            repo2.config["remote.origin.url"] = repo1_url

            assert repo2.config["kart.spatialfilter.geometry"].startswith(
                "POLYGON ((174.879 -37.8277,")
            assert repo2.config["kart.spatialfilter.crs"] == crs
            ds = repo2.datasets()[H.POLYGONS.LAYER]

            local_feature_count = local_features(ds)
            assert local_feature_count != H.POLYGONS.ROWCOUNT
            assert local_feature_count == 52

            r = cli_runner.invoke(["-C", repo2_path, "create-workingcopy"])
            assert r.exit_code == 0, r.stderr

            with repo2.working_copy.session() as sess:
                assert H.row_count(sess, H.POLYGONS.LAYER) == 44

            def _get_key_error(ds, pk):
                try:
                    ds.get_feature(pk)
                    return None
                except KeyError as e:
                    return e

            assert _get_key_error(ds, 1424927) is None
            assert _get_key_error(ds,
                                  9999999).subcode == LibgitSubcode.ENOSUCHPATH
            assert _get_key_error(
                ds, 1443053).subcode == LibgitSubcode.EOBJECTPROMISED
コード例 #13
0
def _import_check(repo_path, table, source_gpkg):
    repo = KartRepo(repo_path)
    dataset = repo.datasets()[table]
    assert dataset.VERSION == 3

    with Db_GPKG.create_engine(source_gpkg).connect() as conn:
        num_rows = conn.execute(f"SELECT COUNT(*) FROM {table};").fetchone()[0]

    o = subprocess.check_output(["git", "ls-tree", "-r", "-t", "HEAD", table])
    print("\n".join(l.decode("utf8") for l in o.splitlines()[:20]))

    if dataset.VERSION != 3:
        raise NotImplementedError(dataset.VERSION)

    re_paths = r"^\d{6} blob [0-9a-f]{40}\t%s/.table-dataset/feature/.*$" % table
    git_paths = [
        m for m in re.findall(re_paths, o.decode("utf-8"), re.MULTILINE)
    ]
    assert len(git_paths) == num_rows

    num_features = dataset.feature_count
    assert num_features == num_rows

    return dataset
コード例 #14
0
def test_spatially_filtered_fetch_promised(data_archive, cli_runner, insert,
                                           monkeypatch,
                                           git_supports_spatial_filter):

    # Keep track of how many features we fetch lazily after the partial clone.
    orig_fetch_func = FetchPromisedBlobsProcess.fetch
    fetch_count = 0

    def _fetch(*args, **kwargs):
        nonlocal fetch_count
        fetch_count += 1
        return orig_fetch_func(*args, **kwargs)

    monkeypatch.setattr(FetchPromisedBlobsProcess, "fetch", _fetch)

    with data_archive("polygons-with-feature-envelopes") as repo1_path:
        repo1_url = f"file://{repo1_path.resolve()}"

        with data_archive("polygons-spatial-filtered") as repo2_path:
            repo2 = KartRepo(repo2_path)
            repo2.config["remote.origin.url"] = repo1_url

            if not git_supports_spatial_filter:
                # Git doesn't understand the "spatial" filter.
                # But we can do this test without it:
                print(
                    "Git doesn't support spatial filters, using blob:none instead"
                )
                repo2.config["remote.origin.partialclonefilter"] = "blob:none"

            orig_config_dict = {c.name: c.value for c in repo2.config}

            ds = repo2.datasets()[H.POLYGONS.LAYER]

            local_feature_count = local_features(ds)
            assert local_feature_count != H.POLYGONS.ROWCOUNT
            assert local_feature_count == 52

            r = cli_runner.invoke(["-C", repo2_path, "create-workingcopy"])
            assert r.exit_code == 0, r.stderr

            with repo2.working_copy.session() as sess:
                assert H.row_count(sess, H.POLYGONS.LAYER) == 44
                # Inserting features that are in the dataset, but don't match the spatial filter,
                # so they are not loaded locally nor written to the working copy.
                for pk in H.POLYGONS.SAMPLE_PKS:
                    if not is_local_feature(ds, pk):
                        insert(sess, with_pk=pk, commit=False)

            r = cli_runner.invoke(["-C", repo2_path, "status"])
            assert r.exit_code == 0, r.stderr
            assert "6 primary key conflicts" in r.stdout
            # All of the 6 featues that are conflicts / were "updated" in the WC have been loaded:
            assert fetch_count == 6
            assert local_features(ds) == 58

            with repo2.working_copy.session() as sess:
                sess.execute(f"DROP TABLE {H.POLYGONS.LAYER};")

            r = cli_runner.invoke(["-C", repo2_path, "status"])
            assert r.exit_code == 0, r.stderr
            assert f"{H.POLYGONS.ROWCOUNT} deletes" in r.stdout
            assert local_features(ds) == 58

            r = cli_runner.invoke(["-C", repo2_path, "diff"])
            assert r.exit_code == 0, r.stderr
            # All of the deleted features have now been loaded to show in the diff output:
            assert local_features(ds) == H.POLYGONS.ROWCOUNT
            assert fetch_count == H.POLYGONS.ROWCOUNT - 52

            final_config_dict = {c.name: c.value for c in repo2.config}
            # Making these fetches shouldn't change any repo config:
            assert final_config_dict == orig_config_dict
コード例 #15
0
def test_import_from_non_gpkg(
    archive,
    source_gpkg,
    table,
    data_archive,
    tmp_path,
    cli_runner,
    chdir,
    request,
    source_format,
    source_ogr_driver,
):
    """
    Import something else into a Kart repository.
    """
    param_ids = H.parameter_ids(request)

    with data_archive(archive) as data:
        with Db_GPKG.create_engine(data / source_gpkg).connect() as conn:
            if param_ids[-1] == "empty":
                print(f"emptying table {table}...")
                conn.execute(f"DELETE FROM {table};")

            num_rows = conn.execute(
                f"SELECT COUNT(*) FROM {table};").fetchone()[0]

        if param_ids[-1] == "empty":
            assert num_rows == 0

        # First, import the original GPKG to one repo
        gpkg_repo_path = tmp_path / "gpkg"
        gpkg_repo_path.mkdir()
        with chdir(gpkg_repo_path):
            r = cli_runner.invoke(["init"])
            assert r.exit_code == 0, r
            r = cli_runner.invoke(["import", data / source_gpkg, table])
            assert r.exit_code == 0, r

        gpkg_repo = KartRepo(gpkg_repo_path)
        gpkg_dataset = gpkg_repo.datasets()[table]

        # convert to a new format using OGR
        source_filename = tmp_path / f"data.{source_format.lower()}"
        gdal.VectorTranslate(
            str(source_filename),
            gdal.OpenEx(str(data / source_gpkg)),
            format=source_ogr_driver,
            layers=[table],
        )
        repo_path = tmp_path / "non-gpkg"
        repo_path.mkdir()
        with chdir(repo_path):
            r = cli_runner.invoke(["init"])
            assert r.exit_code == 0, r

            repo = KartRepo(repo_path)
            assert repo.is_empty

            # Import from SHP/TAB/something into Kart
            r = cli_runner.invoke([
                "import",
                str(source_filename),
                f"data:{table}",
            ])
            assert r.exit_code == 0, r

            assert not repo.is_empty
            assert repo.head.name == "refs/heads/main"
            assert repo.head.shorthand == "main"

            # has a single commit
            assert len([c for c in repo.walk(repo.head.target)]) == 1

            dataset = _import_check(repo_path, table, f"{data / source_gpkg}")

            # Compare the meta items to the GPKG-imported ones
            repo = KartRepo(repo_path)
            dataset = repo.datasets()[table]

            _compare_ogr_and_gpkg_meta_items(dataset, gpkg_dataset)

            if num_rows > 0:
                # compare the first feature in the repo against the source DB
                got_feature = next(dataset.features())
                pk = got_feature[dataset.primary_key]

                src_ds = ogr.Open(str(source_filename))
                src_layer = src_ds.GetLayer(0)
                assert src_layer.GetFeatureCount() == num_rows

                f = src_layer.GetFeature(pk)
                expected_feature = {
                    f.GetFieldDefnRef(i).GetName(): f.GetField(i)
                    for i in range(f.GetFieldCount())
                }
                if "date_adjus" in expected_feature:
                    expected_feature["date_adjus"] = expected_feature[
                        "date_adjus"].replace("/", "-")
                expected_feature["FID"] = f.GetFID()
                if src_layer.GetGeomType() != ogr.wkbNone:
                    g = f.GetGeometryRef()
                    if g:
                        g.AssignSpatialReference(src_layer.GetSpatialRef())
                        if table == H.POLYGONS.LAYER:
                            g = ogr.ForceToMultiPolygon(g)
                    expected_feature["geom"] = ogr_to_gpkg_geom(g)

                assert normalise_feature(got_feature) == expected_feature
コード例 #16
0
def test_shp_import_meta(
    data_archive,
    tmp_path,
    cli_runner,
    request,
):
    with data_archive("gpkg-polygons") as data:
        # convert to SHP using OGR
        source_filename = tmp_path / "nz_waca_adjustments.shp"
        gdal.VectorTranslate(
            str(source_filename),
            gdal.OpenEx(str(data / "nz-waca-adjustments.gpkg")),
            format="ESRI Shapefile",
            layers=["nz_waca_adjustments"],
        )

        # now import the SHP
        repo_path = tmp_path / "repo"
        r = cli_runner.invoke(
            ["init", "--import", source_filename,
             str(repo_path)])
        assert r.exit_code == 0, r

        # now check metadata
        path = "nz_waca_adjustments"
        repo = KartRepo(repo_path)
        dataset = repo.datasets()[path]

        meta_items = dict(dataset.meta_items())
        assert set(meta_items) == {
            "schema.json",
            "crs/EPSG:4167.wkt",
        }
        schema = without_ids(dataset.get_meta_item("schema.json"))
        assert schema == [
            {
                "name": "FID",
                "dataType": "integer",
                "primaryKeyIndex": 0,
                "size": 64
            },
            {
                "name": "geom",
                "dataType": "geometry",
                "geometryType": "MULTIPOLYGON",
                "geometryCRS": "EPSG:4167",
            },
            {
                "name": "date_adjus",
                "dataType": "date"
            },
            {
                "name": "survey_ref",
                "dataType": "text",
                "length": 50
            },
            {
                "name": "adjusted_n",
                "dataType": "integer",
                "size": 32,
            },
        ]
コード例 #17
0
def test_commit_edits(
    archive,
    table,
    commit_sha,
    data_archive,
    cli_runner,
    new_sqlserver_db_schema,
    edit_points,
    edit_polygons,
    edit_table,
):
    """ Checkout a working copy and make some edits """
    with data_archive(archive) as repo_path:
        repo = KartRepo(repo_path)
        H.clear_working_copy()

        with new_sqlserver_db_schema() as (sqlserver_url, sqlserver_schema):
            r = cli_runner.invoke(["create-workingcopy", sqlserver_url])
            assert r.exit_code == 0, r.stderr

            r = cli_runner.invoke(["status"])
            assert r.exit_code == 0, r.stderr
            assert r.stdout.splitlines() == [
                "On branch main",
                "",
                "Nothing to commit, working copy clean",
            ]

            wc = repo.working_copy
            assert wc.status() & WorkingCopyStatus.INITIALISED
            assert wc.status() & WorkingCopyStatus.HAS_DATA

            with wc.session() as sess:
                if archive == "points":
                    edit_points(sess, repo.datasets()[H.POINTS.LAYER], wc)
                elif archive == "polygons":
                    edit_polygons(sess, repo.datasets()[H.POLYGONS.LAYER], wc)
                elif archive == "table":
                    edit_table(sess, repo.datasets()[H.TABLE.LAYER], wc)

            r = cli_runner.invoke(["status"])
            assert r.exit_code == 0, r.stderr
            assert r.stdout.splitlines() == [
                "On branch main",
                "",
                "Changes in working copy:",
                '  (use "kart commit" to commit)',
                '  (use "kart restore" to discard changes)',
                "",
                f"  {table}:",
                "    feature:",
                "      1 inserts",
                "      2 updates",
                "      5 deletes",
            ]
            orig_head = repo.head.peel(pygit2.Commit).hex

            r = cli_runner.invoke(["commit", "-m", "test_commit"])
            assert r.exit_code == 0, r.stderr

            r = cli_runner.invoke(["status"])
            assert r.exit_code == 0, r.stderr
            assert r.stdout.splitlines() == [
                "On branch main",
                "",
                "Nothing to commit, working copy clean",
            ]

            new_head = repo.head.peel(pygit2.Commit).hex
            assert new_head != orig_head

            r = cli_runner.invoke(["checkout", "HEAD^"])

            assert repo.head.peel(pygit2.Commit).hex == orig_head
コード例 #18
0
def test_import_various_field_types(tmp_path, postgres_table_with_types,
                                    cli_runner):
    # Using postgres here because it has the best type preservation

    r = cli_runner.invoke(["init", str(tmp_path / "repo1")])
    assert r.exit_code == 0, r.stderr
    r = cli_runner.invoke([
        "-C",
        str(tmp_path / "repo1"),
        "import",
        os.environ["KART_POSTGRES_URL"],
        "typoes",
    ], )

    assert r.exit_code == 0, r.stderr
    repo = KartRepo(tmp_path / "repo1")
    dataset = repo.datasets()["typoes"]

    cols = _dataset_col_types(dataset)

    assert cols == {
        "bigant_pk": {
            "dataType": "integer",
            "primaryKeyIndex": 0,
            "size": 64
        },
        "bigant": {
            "dataType": "integer",
            "size": 64
        },
        "reel": {
            "dataType": "float",
            "size": 32
        },
        "dubble": {
            "dataType": "float",
            "size": 64
        },
        "smallant": {
            "dataType": "integer",
            "size": 16
        },
        "regularant": {
            "dataType": "integer",
            "size": 32
        },
        "tumeric": {
            "dataType": "numeric"
        },
        "tumeric20_0": {
            "dataType": "numeric",
            "precision": 20
        },
        "tumeric4_0": {
            "dataType": "numeric",
            "precision": 4
        },
        "tumeric5_5": {
            "dataType": "numeric",
            "precision": 5,
            "scale": 5
        },
        "tumeric99_0": {
            "dataType": "numeric",
            "precision": 99
        },
        "techs": {
            "dataType": "text"
        },
        "techs10": {
            "dataType": "text",
            "length": 100
        },
    }

    # Now generate a DBF file, and try again from there.
    ogr_conn_str = postgres_url_to_ogr_conn_str(
        os.environ["KART_POSTGRES_URL"])
    gdal.VectorTranslate(
        str(tmp_path / "typoes.dbf"),
        ogr_conn_str,
        format="ESRI Shapefile",
        layers=["typoes"],
    )

    r = cli_runner.invoke(["init", str(tmp_path / "repo2")])
    assert r.exit_code == 0, r.stderr
    r = cli_runner.invoke([
        "-C",
        str(tmp_path / "repo2"),
        "import",
        str(tmp_path / "typoes.dbf"),
        "typoes",
    ], )

    assert r.exit_code == 0, r.stderr
    repo = KartRepo(tmp_path / "repo2")
    dataset = repo.datasets()["typoes"]

    cols = _dataset_col_types(dataset)
    assert cols == {
        "FID": {
            "dataType": "integer",
            "primaryKeyIndex": 0,
            "size": 64
        },
        "bigant": {
            "dataType": "integer",
            "size": 64
        },
        "regularant": {
            "dataType": "integer",
            "size": 32
        },
        "smallant": {
            "dataType": "integer",
            "size": 32
        },
        "dubble": {
            "dataType": "float",
            "size": 64
        },
        "techs": {
            "dataType": "text",
            "length": 80
        },
        "techs10": {
            "dataType": "text",
            "length": 100
        },
        "tumeric20_": {
            "dataType": "numeric",
            "precision": 20,
            "scale": 0
        },
        "tumeric4_0": {
            "dataType": "numeric",
            "precision": 4,
            "scale": 0
        },
        "tumeric5_5": {
            "dataType": "numeric",
            "precision": 5,
            "scale": 5
        },
        "tumeric99_": {
            "dataType": "numeric",
            "precision": 99,
            "scale": 0
        },
        # These two types conversions are regrettable, but unavoidable as we are using OGR.
        "reel": {
            "dataType": "float",
            "size": 64
        },
        "tumeric": {
            "dataType": "float",
            "size": 64
        },
    }
コード例 #19
0
def test_postgis_import_from_view_no_pk(
    postgis_db,
    postgis_layer,
    data_archive,
    tmp_path,
    cli_runner,
    request,
    chdir,
):
    repo_path = tmp_path / "repo"
    with postgis_layer("gpkg-points", "nz-pa-points-topo-150k.gpkg",
                       "nz_pa_points_topo_150k"):
        with postgis_db.connect() as conn:
            conn.execute("""
                CREATE OR REPLACE VIEW nz_pa_points_view AS (
                    SELECT geom, t50_fid, name_ascii, macronated, name
                    FROM nz_pa_points_topo_150k
                    WHERE fid %% 3 != 0
                );
                """)
        _test_postgis_import(
            repo_path,
            cli_runner,
            chdir,
            table_name="nz_pa_points_view",
            pk_name="auto_pk",
        )

        repo = KartRepo(repo_path)
        dataset = repo.datasets()["nz_pa_points_view"]
        initial_pks = [f["auto_pk"] for f in dataset.features()]
        assert len(initial_pks) == 1429
        assert max(initial_pks) == 1429
        assert sorted(initial_pks) == list(range(1, 1429 + 1))

        with postgis_db.connect() as conn:
            conn.execute("DROP VIEW IF EXISTS nz_pa_points_view;")
            conn.execute("""
                CREATE OR REPLACE VIEW nz_pa_points_view AS (
                    SELECT geom, t50_fid, name_ascii, macronated, name
                    FROM nz_pa_points_topo_150k
                    WHERE fid %% 3 != 1
                );
                """)

        r = cli_runner.invoke([
            "--repo",
            str(repo_path.resolve()),
            "import",
            os.environ["KART_POSTGRES_URL"],
            "nz_pa_points_view",
            "--replace-existing",
        ])
        assert r.exit_code == 0, r.stderr
        repo = KartRepo(repo_path)
        dataset = repo.datasets()["nz_pa_points_view"]
        new_pks = [f["auto_pk"] for f in dataset.features()]

        assert len(new_pks) == 1428
        assert max(new_pks) == 2143
        assert len(set(initial_pks) & set(new_pks)) == 714
        # 2143 features total - but 1429 are in the first group and 1428 are in the second group
        # Means 714 features are in both, and should be imported with the same PK both times
        # 1429 + 1428 is 2857, which is 714 more features than the actual total of 2143

        with postgis_db.connect() as conn:
            # This is similar enough to be detected as an edit - only one field is different.
            conn.execute(
                "UPDATE nz_pa_points_topo_150k SET name_ascii='foo' WHERE fid=3;"
            )
            # This is similar enough to be detected as an edit - only one field is different.
            conn.execute(
                "UPDATE nz_pa_points_topo_150k SET name='qux' WHERE fid=6;")
            # This will not be detected as an edit - two fields are different,
            # so it looks like one feature is deleted and a different one is inserted.
            conn.execute(
                "UPDATE nz_pa_points_topo_150k SET name_ascii='bar', name='baz' WHERE fid=9;"
            )
            conn.execute("DROP VIEW IF EXISTS nz_pa_points_view;")
            conn.execute("""
                CREATE OR REPLACE VIEW nz_pa_points_view AS (
                    SELECT geom, t50_fid, name_ascii, macronated, name
                    FROM nz_pa_points_topo_150k
                    WHERE fid %% 3 != 2
                );
                """)

        r = cli_runner.invoke([
            "--repo",
            str(repo_path.resolve()),
            "import",
            os.environ["KART_POSTGRES_URL"],
            "nz_pa_points_view",
            "--replace-existing",
        ])
        assert r.exit_code == 0, r.stderr
        r = cli_runner.invoke(["--repo", str(repo_path.resolve()), "show"])
        assert r.exit_code == 0, r.stderr

        output = r.stdout.splitlines()
        # Huge amount of adds and deletes caused by changing which features are included in the view again:
        assert len(output) == 10031

        # But, we still are able to recognise the edits we made as edits.
        # (For happy mathematical reasons, these diffs end up at the end of the output)
        assert output[-22:] == [
            # Edit: name_ascii changed to foo
            "--- nz_pa_points_view:feature:1430",
            "+++ nz_pa_points_view:feature:1430",
            "-                               name_ascii = Tauwhare Pa",
            "+                               name_ascii = foo",
            # Edit: name changed to qux
            "--- nz_pa_points_view:feature:1431",
            "+++ nz_pa_points_view:feature:1431",
            "-                                     name = ␀",
            "+                                     name = qux",
            # Not considered an edit - both name_ascii and name changed
            # So, left as a delete + insert, and assigned a new PK
            "--- nz_pa_points_view:feature:1432",
            "-                                  auto_pk = 1432",
            "-                                     geom = POINT(...)",
            "-                                  t50_fid = 2426279",
            "-                               name_ascii = ␀",
            "-                               macronated = N",
            "-                                     name = ␀",
            "+++ nz_pa_points_view:feature:2144",
            "+                                  auto_pk = 2144",
            "+                                     geom = POINT(...)",
            "+                                  t50_fid = 2426279",
            "+                               name_ascii = bar",
            "+                               macronated = N",
            "+                                     name = baz",
        ]
コード例 #20
0
def test_clone_with_reference_spatial_filter(data_archive, cli_runner,
                                             tmp_path):
    # TODO - this currently tests that the spatial filter is correctly applied locally after
    # the entire repo is cloned. Applying a reference spatial filter remotely to do a
    # partial clone is not yet supported.

    geom = SPATIAL_FILTER_GEOMETRY["polygons"]
    crs = SPATIAL_FILTER_CRS["polygons"]

    file_path = tmp_path / "spatialfilter.txt"
    file_path.write_text(f"{crs}\n\n{geom}\n", encoding="utf-8")

    with data_archive("polygons") as repo1_path:
        r = cli_runner.invoke([
            "commit-files",
            "-m",
            "Add spatial filter",
            f"spatialfilter.txt=@{file_path}",
        ])
        assert r.exit_code == 0, r.stderr
        r = cli_runner.invoke(["git", "hash-object", file_path])
        assert r.exit_code == 0, r.stderr
        blob_sha = r.stdout.strip()
        r = cli_runner.invoke(
            ["git", "update-ref", "refs/filters/octagon", blob_sha])
        assert r.exit_code == 0, r.stderr

        # Spatial filter is now stored with ref "octagon".
        # Test spatial-filter resolve:
        r = cli_runner.invoke(["spatial-filter", "resolve", "octagon"])
        assert r.exit_code == 0, r.stderr
        assert r.stdout.startswith(f"{crs}\n\nPOLYGON((174.879 -37.8277,")

        r = cli_runner.invoke(
            ["spatial-filter", "resolve", "octagon", "--envelope"])
        assert r.exit_code == 0, r.stderr
        assert r.stdout == "174.879,-37.9783,175.3878,-37.4987\n"

        r = cli_runner.invoke(
            ["spatial-filter", "resolve", "octagon", "-o", "json"])
        assert r.exit_code == 0, r.stderr
        jdict = json.loads(r.stdout)
        assert jdict["reference"] == "refs/filters/octagon"
        assert jdict["objectId"] == blob_sha
        assert jdict["geometry"].startswith(
            "01030000000100000009000000E3A59BC420DC65401973D7")
        assert jdict["crs"] == crs

        r = cli_runner.invoke([
            "spatial-filter", "resolve", "octagon", "-o", "json", "--envelope"
        ])
        assert r.exit_code == 0, r.stderr
        envelope = json.loads(r.stdout)
        assert envelope == [174.879, -37.9783, 175.3878, -37.4987]

        # This is disabled by default as it is still not fully supported.
        os.environ["X_KART_SPATIAL_FILTER_REFERENCE"] = "1"
        try:
            # Clone repo using spatial filter reference
            repo2_path = tmp_path / "repo2"
            r = cli_runner.invoke([
                "clone",
                repo1_path,
                repo2_path,
                "--spatial-filter=octagon",
                "--spatial-filter-after-clone",
            ])
            assert r.exit_code == 0, r.stderr

            # The resulting repo has the spatial filter configured locally.
            repo2 = KartRepo(repo2_path)
            assert (repo2.config["kart.spatialfilter.reference"] ==
                    "refs/filters/octagon")
            assert repo2.config["kart.spatialfilter.objectid"] == blob_sha

            # However, the entire polygons layer was cloned.
            # TODO: Only clone the features that match the spatial filter.
            assert (local_features(
                repo2.datasets()[H.POLYGONS.LAYER]) == H.POLYGONS.ROWCOUNT)

            with repo2.working_copy.session() as sess:
                assert H.row_count(sess, H.POLYGONS.LAYER) == 44

            # Clone repo using spatial filter object ID
            repo3_path = tmp_path / "repo3"
            r = cli_runner.invoke([
                "clone",
                repo1_path,
                repo3_path,
                f"--spatial-filter={blob_sha}",
                "--spatial-filter-after-clone",
            ])
            assert r.exit_code == 0, r.stderr
            repo3 = KartRepo(repo3_path)
            assert repo3.config["kart.spatialfilter.geometry"].startswith(
                "POLYGON ((174.879 -37.8277,")
            assert repo3.config["kart.spatialfilter.crs"] == crs

            with repo3.working_copy.session() as sess:
                assert H.row_count(sess, H.POLYGONS.LAYER) == 44

            # Missing spatial filter:
            repo4_path = tmp_path / "repo4"
            r = cli_runner.invoke([
                "clone",
                repo1_path,
                repo4_path,
                "--spatial-filter=dodecahedron",
                "--spatial-filter-after-clone",
            ])
            assert r.exit_code == NO_SPATIAL_FILTER, r.stderr

        finally:
            del os.environ["X_KART_SPATIAL_FILTER_REFERENCE"]
コード例 #21
0
ファイル: test_commit.py プロジェクト: koordinates/kart
def test_commit(
    archive,
    layer,
    partial,
    data_working_copy,
    cli_runner,
    request,
    edit_points,
    edit_polygons,
    edit_table,
):
    """ commit outstanding changes from the working copy """

    with data_working_copy(archive) as (repo_dir, wc_path):
        # empty
        r = cli_runner.invoke(["commit", "-m", "test-commit-empty"])
        assert r.exit_code == NO_CHANGES, r
        assert r.stderr.splitlines() == ["Error: No changes to commit"]

        # empty
        r = cli_runner.invoke(["commit", "-m", "test-commit-empty", "--allow-empty"])
        assert r.exit_code == 0, r

        # make some changes
        repo = KartRepo(repo_dir)
        with repo.working_copy.session() as sess:
            try:
                edit_func = locals()[f"edit_{archive}"]
                pk_del = edit_func(sess)
            except KeyError:
                raise NotImplementedError(f"No edit_{archive}")

        print(f"deleted fid={pk_del}")

        repo = KartRepo(repo_dir)
        dataset = repo.datasets()[layer]

        wc = repo.working_copy
        original_change_count = wc.tracking_changes_count(dataset)

        if partial:
            r = cli_runner.invoke(
                ["commit", "-m", "test-commit-1", "-o", "json", f"{layer}:{pk_del}"]
            )
        else:
            r = cli_runner.invoke(["commit", "-m", "test-commit-1", "-o", "json"])

        assert r.exit_code == 0, r
        commit_id = json.loads(r.stdout)["kart.commit/v1"]["commit"]
        print("commit:", commit_id)

        assert str(repo.head.target) == commit_id
        commit = repo.head_commit
        assert commit.message == "test-commit-1"
        assert time.time() - commit.commit_time < 10

        tree = repo.head_tree
        assert dataset.encode_1pk_to_path(pk_del) not in tree

        wc.assert_db_tree_match(tree)
        change_count = wc.tracking_changes_count(dataset)

        if partial:
            # All but one change should still be in the tracking table
            assert change_count == original_change_count - 1

            # Changes should still be visible in the working copy:
            r = cli_runner.invoke(["diff", "--exit-code"])
            assert r.exit_code == 1, r
            assert r.stdout != ""

        else:
            assert (
                change_count == 0
            ), f"Changes still listed in {wc.TRACKING_TABLE} after full commit"

            r = cli_runner.invoke(["diff", "--exit-code"])
            assert r.exit_code == 0, r
            assert r.stdout == ""