def test_clone_with_spatial_filter(git_with_spatial_filter_support, data_archive, cli_runner, tmp_path): geom = SPATIAL_FILTER_GEOMETRY["polygons"] crs = SPATIAL_FILTER_CRS["polygons"] file_path = (tmp_path / "spatialfilter.txt").resolve() file_path.write_text(f"{crs}\n\n{geom}\n", encoding="utf-8") with data_archive("polygons-with-feature-envelopes") as repo1_path: repo1_url = f"file://{repo1_path.resolve()}" # Clone repo using spatial filter repo2_path = tmp_path / "repo2" r = cli_runner.invoke([ "clone", repo1_url, repo2_path, f"--spatial-filter=@{file_path}", "--spatial-filter-after-clone", ]) assert r.exit_code == 0, r.stderr # The resulting repo has the spatial filter configured locally. repo2 = KartRepo(repo2_path) assert repo2.config["kart.spatialfilter.geometry"].startswith( "POLYGON ((174.879 -37.8277,") assert repo2.config["kart.spatialfilter.crs"] == crs with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == 44 # However, the entire polygons layer was cloned due to --spatial-filter-after-clone. # The spatial filter is only applied locally... all features are still present. assert local_features( repo2.datasets()[H.POLYGONS.LAYER]) == H.POLYGONS.ROWCOUNT # Try it again without --spatial-filter-after-clone. repo3_path = tmp_path / "repo3" r = cli_runner.invoke( ["clone", repo1_url, repo3_path, f"--spatial-filter=@{file_path}"]) assert r.exit_code == 0, r.stderr repo3 = KartRepo(repo3_path) assert repo3.config["kart.spatialfilter.geometry"].startswith( "POLYGON ((174.879 -37.8277,") assert repo3.config["kart.spatialfilter.crs"] == crs ds = repo3.datasets()[H.POLYGONS.LAYER] local_feature_count = local_features(ds) assert local_feature_count != H.POLYGONS.ROWCOUNT assert local_feature_count == 46 with repo3.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == 44
def test_reclone_with_larger_spatial_filter(git_with_spatial_filter_support, data_archive, cli_runner, tmp_path): geom = SPATIAL_FILTER_GEOMETRY["polygons"] crs = SPATIAL_FILTER_CRS["polygons"] file_path = (tmp_path / "spatialfilter.txt").resolve() file_path.write_text(f"{crs}\n\n{geom}\n", encoding="utf-8") EMPTY_SPATIAL_FILTER = "EPSG:4326;POLYGON((0 0,0 1,1 1,1 0,0 0))" with data_archive("polygons-with-feature-envelopes") as repo1_path: repo1_url = f"file://{repo1_path.resolve()}" # Clone repo using spatial filter repo2_path = tmp_path / "repo2" # TODO: Invert some of this test when --spatial-filter-during-clone is inverted. r = cli_runner.invoke([ "clone", repo1_url, repo2_path, f"--spatial-filter={EMPTY_SPATIAL_FILTER}", "--spatial-filter-during-clone", ]) assert r.exit_code == 0, r.stderr repo2 = KartRepo(repo2_path) with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == 0 assert local_features(repo2.datasets()[H.POLYGONS.LAYER]) == 0 r = cli_runner.invoke( ["-C", repo2_path, "checkout", f"--spatial-filter=@{file_path}"]) assert r.exit_code == 0, r.stderr with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == 44 assert local_features(repo2.datasets()[H.POLYGONS.LAYER]) == 46 r = cli_runner.invoke( ["-C", repo2_path, "checkout", "--spatial-filter=none"]) with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == H.POLYGONS.ROWCOUNT assert local_features( repo2.datasets()[H.POLYGONS.LAYER]) == H.POLYGONS.ROWCOUNT r = cli_runner.invoke( ["-C", repo2_path, "checkout", f"--spatial-filter=@{file_path}"]) with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == 44 assert local_features( repo2.datasets()[H.POLYGONS.LAYER]) == H.POLYGONS.ROWCOUNT
def test_checkout_workingcopy(archive, table, commit_sha, data_archive, tmp_path, cli_runner): """ Checkout a working copy to edit """ with data_archive(archive) as repo_path: H.clear_working_copy() repo = KartRepo(repo_path) dataset = repo.datasets()[table] geom_cols = dataset.schema.geometry_columns r = cli_runner.invoke(["checkout"]) wc_path = Path(repo.config["kart.workingcopy.location"]) assert r.exit_code == 0, r assert r.stdout.splitlines() == [ f"Creating working copy at {wc_path} ..." ] assert wc_path.exists() wc = repo.working_copy assert repo.head.name == "refs/heads/main" assert repo.head.shorthand == "main" assert wc.get_db_tree() == repo.head_tree.hex if geom_cols: with wc.session() as sess: spatial_index_count = sess.execute( f"""SELECT COUNT(*) FROM "rtree_{table}_{geom_cols[0].name}";""" ).scalar() assert spatial_index_count == dataset.feature_count table_spec = KartAdapter_GPKG.v2_schema_to_sql_spec(dataset.schema) expected_col_spec = f"{KartAdapter_GPKG.quote(dataset.primary_key)} INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL" assert expected_col_spec in table_spec
def _test_postgis_import( repo_path, cli_runner, chdir, *, table_name, pk_name="id", pk_size=64, import_args=(), ): r = cli_runner.invoke(["init", repo_path]) assert r.exit_code == 0, r with chdir(repo_path): r = cli_runner.invoke([ "import", os.environ["KART_POSTGRES_URL"], table_name, *import_args, ]) assert r.exit_code == 0, r # now check metadata repo = KartRepo(repo_path) dataset = repo.datasets()[table_name] meta_items = dict(dataset.meta_items()) meta_item_keys = set(meta_items.keys()) assert "schema.json" in meta_item_keys crs_keys = meta_item_keys - {"title", "description", "schema.json"} assert len(crs_keys) == 1 crs_key = next(iter(crs_keys)) assert crs_key.startswith("crs/EPSG:") and crs_key.endswith(".wkt")
def test_postgis_import_with_sampled_geometry_dimension( postgis_db, data_archive, tmp_path, cli_runner, request, chdir, ): with postgis_db.connect() as conn: conn.execute("""DROP TABLE IF EXISTS points_xyz CASCADE;""") conn.execute( """CREATE TABLE points_xyz (fid BIGINT PRIMARY KEY, shape GEOMETRY);""" ) conn.execute( """INSERT INTO points_xyz (fid, shape) VALUES (1, ST_GeomFromText('POINT(1 2 3)', 4326));""" ) _test_postgis_import( tmp_path / "repo", cli_runner, chdir, table_name="points_xyz", pk_name="fid", pk_size=64, import_args=["--primary-key=fid"], ) repo = KartRepo(tmp_path / "repo") dataset = repo.datasets()["points_xyz"] [geom_col] = dataset.schema.geometry_columns assert geom_col.extra_type_info["geometryType"] == "GEOMETRY Z" conn.execute("""DROP TABLE IF EXISTS points_xyz CASCADE;""")
def test_fast_import(data_archive, tmp_path, cli_runner, chdir): table = H.POINTS.LAYER with data_archive("gpkg-points") as data: # list tables repo_path = tmp_path / "repo" repo_path.mkdir() with chdir(repo_path): r = cli_runner.invoke(["init"]) assert r.exit_code == 0, r repo = KartRepo(repo_path) source = TableImportSource.open(data / "nz-pa-points-topo-150k.gpkg", table=table) fast_import.fast_import_tables(repo, [source], from_commit=None) assert not repo.is_empty assert repo.head.name == "refs/heads/main" assert repo.head.shorthand == "main" dataset = repo.datasets()[table] assert dataset.VERSION == 3 # has a single commit assert len([c for c in repo.walk(repo.head.target)]) == 1 assert list(dataset.meta_items()) # has the right number of features feature_count = sum(1 for f in dataset.features()) assert feature_count == source.feature_count
def test_spatially_filtered_merge(data_archive, cli_runner): # Make sure spatially filtered merges work (that is, make sure writing merged indexes with missing features works). # See https://github.com/koordinates/kart/issues/550 with data_archive("points-with-feature-envelopes") as repo1_path: repo1_url = f"file://{repo1_path.resolve()}" with data_archive("points-spatial-filtered") as repo2_path: repo2 = KartRepo(repo2_path) repo2.config["remote.origin.url"] = repo1_url ds = repo2.datasets()[H.POINTS.LAYER] local_feature_count = local_features(ds) assert local_feature_count != H.POINTS.ROWCOUNT assert local_feature_count == 817 r = cli_runner.invoke(["-C", repo2_path, "create-workingcopy"]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke(["-C", repo2_path, "checkout", "-b", "left"]) assert r.exit_code == 0, r.stderr with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POINTS.LAYER) == 302 sess.execute( f"DELETE FROM {H.POINTS.LAYER} WHERE fid % 3 != 0;") r = cli_runner.invoke( ["-C", repo2_path, "commit", "-m", "left-commit"]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke( ["-C", repo2_path, "checkout", "-b", "right", "HEAD^"]) assert r.exit_code == 0, r.stderr with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POINTS.LAYER) == 302 sess.execute( f"DELETE FROM {H.POINTS.LAYER} WHERE fid % 3 != 1;") r = cli_runner.invoke( ["-C", repo2_path, "commit", "-m", "right-commit"]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke( ["-C", repo2_path, "merge", "left", "-m", "merged"]) assert r.exit_code == 0, r.stderr # Make sure we can do a full-read of the new commit without any problems - # See https://github.com/koordinates/kart/issues/552 which explains why running create-workingcopy # can fail after a commit in a spatial-filted repo (unless we are careful and use promisor-packfiles), # whereas running diff will generally succeed regardless. r = cli_runner.invoke( ["-C", repo2_path, "create-workingcopy", "--delete-existing"]) assert r.exit_code == 0, r.stderr
def test_spatially_filtered_commit(data_archive, cli_runner): # We use the points layer for this test since it uses consecutive integer PKs. # This means that promised features and locally features are likely to both be stored in the # same tree, which highlights a potential issue: https://github.com/koordinates/kart/issues/552 with data_archive("points-with-feature-envelopes") as repo1_path: repo1_url = f"file://{repo1_path.resolve()}" with data_archive("points-spatial-filtered") as repo2_path: repo2 = KartRepo(repo2_path) repo2.config["remote.origin.url"] = repo1_url ds = repo2.datasets()[H.POINTS.LAYER] local_feature_count = local_features(ds) assert local_feature_count != H.POINTS.ROWCOUNT assert local_feature_count == 817 r = cli_runner.invoke(["-C", repo2_path, "create-workingcopy"]) assert r.exit_code == 0, r.stderr with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POINTS.LAYER) == 302 sess.execute(f"DELETE FROM {H.POINTS.LAYER}") r = cli_runner.invoke( ["-C", repo2_path, "commit", "-m", "delete-matching"]) assert r.exit_code == 0, r.stderr ds = repo2.datasets()[H.POINTS.LAYER] assert ds.feature_count == H.POINTS.ROWCOUNT - 302 assert local_features(ds) == 817 - 302 # Make sure we can do a full-read of the new commit without any problems - # See https://github.com/koordinates/kart/issues/552 which explains why running create-workingcopy # can fail after a commit in a spatial-filted repo (unless we are careful and use promisor-packfiles), # whereas running diff will generally succeed regardless. r = cli_runner.invoke( ["-C", repo2_path, "create-workingcopy", "--delete-existing"]) assert r.exit_code == 0, r.stderr
def test_pk_encoder_legacy_hashed(data_archive_readonly): archive_path = Path("upgrade") / "v2.kart" / "points.tgz" with data_archive_readonly(archive_path) as repo_path: repo = KartRepo(repo_path) ds = repo.datasets()["nz_pa_points_topo_150k"] e = ds.feature_path_encoder assert isinstance(e, MsgpackHashPathEncoder) assert e.encoding == "hex" assert e.branches == 256 assert e.levels == 2 assert (ds.encode_1pk_to_path(1181) == "nz_pa_points_topo_150k/.sno-dataset/feature/7b/36/kc0EnQ==") assert (ds.encode_1pk_to_path("Dave") == "nz_pa_points_topo_150k/.sno-dataset/feature/b2/fe/kaREYXZl")
def test_import_no_pk_performance(data_archive_readonly, benchmark): with data_archive_readonly("points") as repo_path: repo = KartRepo(repo_path) dataset = repo.datasets()["nz_pa_points_topo_150k"] dataset.meta_overrides = {} features = list(dataset.features()) assert len(features) == 2143 old_features = features[0:1000] new_features = features[1000:2143] pkis = PkGeneratingTableImportSource(dataset, repo) pkis.prev_dest_schema = dataset.schema pkis.primary_key = dataset.primary_key def _match_features_benchmark(): # Exhaust generator: for _ in pkis._match_similar_features_and_remove( old_features, new_features): pass benchmark(_match_features_benchmark)
def test_feature_find_decode_performance( profile, archive, source_gpkg, table, data_archive, data_imported, benchmark, request, ): """ Check single-feature decoding performance """ param_ids = H.parameter_ids(request) benchmark.group = ( f"test_feature_find_decode_performance - {profile} - {param_ids[-1]}") repo_path = data_imported(archive, source_gpkg, table) repo = KartRepo(repo_path) dataset = repo.datasets()["mytable"] inner_tree = dataset.inner_tree with data_archive(archive) as data: with Db_GPKG.create_engine(data / source_gpkg).connect() as conn: num_rows = conn.execute( f"SELECT COUNT(*) FROM {table};").fetchone()[0] pk_field = Db_GPKG.pk_name(conn, table=table) pk = conn.execute( f"SELECT {pk_field} FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};" ).fetchone()[0] if profile == "get_feature_by_pk": benchmark(dataset.get_feature, pk) elif profile == "get_feature_from_data": feature_path = dataset.encode_1pk_to_path(pk, relative=True) feature_data = memoryview(inner_tree / feature_path) benchmark(dataset.get_feature, path=feature_path, data=feature_data) else: raise NotImplementedError(f"Unknown profile: {profile}")
def test_spatially_filtered_partial_clone(data_archive, cli_runner): crs = SPATIAL_FILTER_CRS["polygons"] with data_archive("polygons-with-feature-envelopes") as repo1_path: repo1_url = f"file://{repo1_path.resolve()}" with data_archive("polygons-spatial-filtered") as repo2_path: repo2 = KartRepo(repo2_path) repo2.config["remote.origin.url"] = repo1_url assert repo2.config["kart.spatialfilter.geometry"].startswith( "POLYGON ((174.879 -37.8277,") assert repo2.config["kart.spatialfilter.crs"] == crs ds = repo2.datasets()[H.POLYGONS.LAYER] local_feature_count = local_features(ds) assert local_feature_count != H.POLYGONS.ROWCOUNT assert local_feature_count == 52 r = cli_runner.invoke(["-C", repo2_path, "create-workingcopy"]) assert r.exit_code == 0, r.stderr with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == 44 def _get_key_error(ds, pk): try: ds.get_feature(pk) return None except KeyError as e: return e assert _get_key_error(ds, 1424927) is None assert _get_key_error(ds, 9999999).subcode == LibgitSubcode.ENOSUCHPATH assert _get_key_error( ds, 1443053).subcode == LibgitSubcode.EOBJECTPROMISED
def _import_check(repo_path, table, source_gpkg): repo = KartRepo(repo_path) dataset = repo.datasets()[table] assert dataset.VERSION == 3 with Db_GPKG.create_engine(source_gpkg).connect() as conn: num_rows = conn.execute(f"SELECT COUNT(*) FROM {table};").fetchone()[0] o = subprocess.check_output(["git", "ls-tree", "-r", "-t", "HEAD", table]) print("\n".join(l.decode("utf8") for l in o.splitlines()[:20])) if dataset.VERSION != 3: raise NotImplementedError(dataset.VERSION) re_paths = r"^\d{6} blob [0-9a-f]{40}\t%s/.table-dataset/feature/.*$" % table git_paths = [ m for m in re.findall(re_paths, o.decode("utf-8"), re.MULTILINE) ] assert len(git_paths) == num_rows num_features = dataset.feature_count assert num_features == num_rows return dataset
def test_spatially_filtered_fetch_promised(data_archive, cli_runner, insert, monkeypatch, git_supports_spatial_filter): # Keep track of how many features we fetch lazily after the partial clone. orig_fetch_func = FetchPromisedBlobsProcess.fetch fetch_count = 0 def _fetch(*args, **kwargs): nonlocal fetch_count fetch_count += 1 return orig_fetch_func(*args, **kwargs) monkeypatch.setattr(FetchPromisedBlobsProcess, "fetch", _fetch) with data_archive("polygons-with-feature-envelopes") as repo1_path: repo1_url = f"file://{repo1_path.resolve()}" with data_archive("polygons-spatial-filtered") as repo2_path: repo2 = KartRepo(repo2_path) repo2.config["remote.origin.url"] = repo1_url if not git_supports_spatial_filter: # Git doesn't understand the "spatial" filter. # But we can do this test without it: print( "Git doesn't support spatial filters, using blob:none instead" ) repo2.config["remote.origin.partialclonefilter"] = "blob:none" orig_config_dict = {c.name: c.value for c in repo2.config} ds = repo2.datasets()[H.POLYGONS.LAYER] local_feature_count = local_features(ds) assert local_feature_count != H.POLYGONS.ROWCOUNT assert local_feature_count == 52 r = cli_runner.invoke(["-C", repo2_path, "create-workingcopy"]) assert r.exit_code == 0, r.stderr with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == 44 # Inserting features that are in the dataset, but don't match the spatial filter, # so they are not loaded locally nor written to the working copy. for pk in H.POLYGONS.SAMPLE_PKS: if not is_local_feature(ds, pk): insert(sess, with_pk=pk, commit=False) r = cli_runner.invoke(["-C", repo2_path, "status"]) assert r.exit_code == 0, r.stderr assert "6 primary key conflicts" in r.stdout # All of the 6 featues that are conflicts / were "updated" in the WC have been loaded: assert fetch_count == 6 assert local_features(ds) == 58 with repo2.working_copy.session() as sess: sess.execute(f"DROP TABLE {H.POLYGONS.LAYER};") r = cli_runner.invoke(["-C", repo2_path, "status"]) assert r.exit_code == 0, r.stderr assert f"{H.POLYGONS.ROWCOUNT} deletes" in r.stdout assert local_features(ds) == 58 r = cli_runner.invoke(["-C", repo2_path, "diff"]) assert r.exit_code == 0, r.stderr # All of the deleted features have now been loaded to show in the diff output: assert local_features(ds) == H.POLYGONS.ROWCOUNT assert fetch_count == H.POLYGONS.ROWCOUNT - 52 final_config_dict = {c.name: c.value for c in repo2.config} # Making these fetches shouldn't change any repo config: assert final_config_dict == orig_config_dict
def test_import_from_non_gpkg( archive, source_gpkg, table, data_archive, tmp_path, cli_runner, chdir, request, source_format, source_ogr_driver, ): """ Import something else into a Kart repository. """ param_ids = H.parameter_ids(request) with data_archive(archive) as data: with Db_GPKG.create_engine(data / source_gpkg).connect() as conn: if param_ids[-1] == "empty": print(f"emptying table {table}...") conn.execute(f"DELETE FROM {table};") num_rows = conn.execute( f"SELECT COUNT(*) FROM {table};").fetchone()[0] if param_ids[-1] == "empty": assert num_rows == 0 # First, import the original GPKG to one repo gpkg_repo_path = tmp_path / "gpkg" gpkg_repo_path.mkdir() with chdir(gpkg_repo_path): r = cli_runner.invoke(["init"]) assert r.exit_code == 0, r r = cli_runner.invoke(["import", data / source_gpkg, table]) assert r.exit_code == 0, r gpkg_repo = KartRepo(gpkg_repo_path) gpkg_dataset = gpkg_repo.datasets()[table] # convert to a new format using OGR source_filename = tmp_path / f"data.{source_format.lower()}" gdal.VectorTranslate( str(source_filename), gdal.OpenEx(str(data / source_gpkg)), format=source_ogr_driver, layers=[table], ) repo_path = tmp_path / "non-gpkg" repo_path.mkdir() with chdir(repo_path): r = cli_runner.invoke(["init"]) assert r.exit_code == 0, r repo = KartRepo(repo_path) assert repo.is_empty # Import from SHP/TAB/something into Kart r = cli_runner.invoke([ "import", str(source_filename), f"data:{table}", ]) assert r.exit_code == 0, r assert not repo.is_empty assert repo.head.name == "refs/heads/main" assert repo.head.shorthand == "main" # has a single commit assert len([c for c in repo.walk(repo.head.target)]) == 1 dataset = _import_check(repo_path, table, f"{data / source_gpkg}") # Compare the meta items to the GPKG-imported ones repo = KartRepo(repo_path) dataset = repo.datasets()[table] _compare_ogr_and_gpkg_meta_items(dataset, gpkg_dataset) if num_rows > 0: # compare the first feature in the repo against the source DB got_feature = next(dataset.features()) pk = got_feature[dataset.primary_key] src_ds = ogr.Open(str(source_filename)) src_layer = src_ds.GetLayer(0) assert src_layer.GetFeatureCount() == num_rows f = src_layer.GetFeature(pk) expected_feature = { f.GetFieldDefnRef(i).GetName(): f.GetField(i) for i in range(f.GetFieldCount()) } if "date_adjus" in expected_feature: expected_feature["date_adjus"] = expected_feature[ "date_adjus"].replace("/", "-") expected_feature["FID"] = f.GetFID() if src_layer.GetGeomType() != ogr.wkbNone: g = f.GetGeometryRef() if g: g.AssignSpatialReference(src_layer.GetSpatialRef()) if table == H.POLYGONS.LAYER: g = ogr.ForceToMultiPolygon(g) expected_feature["geom"] = ogr_to_gpkg_geom(g) assert normalise_feature(got_feature) == expected_feature
def test_shp_import_meta( data_archive, tmp_path, cli_runner, request, ): with data_archive("gpkg-polygons") as data: # convert to SHP using OGR source_filename = tmp_path / "nz_waca_adjustments.shp" gdal.VectorTranslate( str(source_filename), gdal.OpenEx(str(data / "nz-waca-adjustments.gpkg")), format="ESRI Shapefile", layers=["nz_waca_adjustments"], ) # now import the SHP repo_path = tmp_path / "repo" r = cli_runner.invoke( ["init", "--import", source_filename, str(repo_path)]) assert r.exit_code == 0, r # now check metadata path = "nz_waca_adjustments" repo = KartRepo(repo_path) dataset = repo.datasets()[path] meta_items = dict(dataset.meta_items()) assert set(meta_items) == { "schema.json", "crs/EPSG:4167.wkt", } schema = without_ids(dataset.get_meta_item("schema.json")) assert schema == [ { "name": "FID", "dataType": "integer", "primaryKeyIndex": 0, "size": 64 }, { "name": "geom", "dataType": "geometry", "geometryType": "MULTIPOLYGON", "geometryCRS": "EPSG:4167", }, { "name": "date_adjus", "dataType": "date" }, { "name": "survey_ref", "dataType": "text", "length": 50 }, { "name": "adjusted_n", "dataType": "integer", "size": 32, }, ]
def test_commit_edits( archive, table, commit_sha, data_archive, cli_runner, new_sqlserver_db_schema, edit_points, edit_polygons, edit_table, ): """ Checkout a working copy and make some edits """ with data_archive(archive) as repo_path: repo = KartRepo(repo_path) H.clear_working_copy() with new_sqlserver_db_schema() as (sqlserver_url, sqlserver_schema): r = cli_runner.invoke(["create-workingcopy", sqlserver_url]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke(["status"]) assert r.exit_code == 0, r.stderr assert r.stdout.splitlines() == [ "On branch main", "", "Nothing to commit, working copy clean", ] wc = repo.working_copy assert wc.status() & WorkingCopyStatus.INITIALISED assert wc.status() & WorkingCopyStatus.HAS_DATA with wc.session() as sess: if archive == "points": edit_points(sess, repo.datasets()[H.POINTS.LAYER], wc) elif archive == "polygons": edit_polygons(sess, repo.datasets()[H.POLYGONS.LAYER], wc) elif archive == "table": edit_table(sess, repo.datasets()[H.TABLE.LAYER], wc) r = cli_runner.invoke(["status"]) assert r.exit_code == 0, r.stderr assert r.stdout.splitlines() == [ "On branch main", "", "Changes in working copy:", ' (use "kart commit" to commit)', ' (use "kart restore" to discard changes)', "", f" {table}:", " feature:", " 1 inserts", " 2 updates", " 5 deletes", ] orig_head = repo.head.peel(pygit2.Commit).hex r = cli_runner.invoke(["commit", "-m", "test_commit"]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke(["status"]) assert r.exit_code == 0, r.stderr assert r.stdout.splitlines() == [ "On branch main", "", "Nothing to commit, working copy clean", ] new_head = repo.head.peel(pygit2.Commit).hex assert new_head != orig_head r = cli_runner.invoke(["checkout", "HEAD^"]) assert repo.head.peel(pygit2.Commit).hex == orig_head
def test_import_various_field_types(tmp_path, postgres_table_with_types, cli_runner): # Using postgres here because it has the best type preservation r = cli_runner.invoke(["init", str(tmp_path / "repo1")]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke([ "-C", str(tmp_path / "repo1"), "import", os.environ["KART_POSTGRES_URL"], "typoes", ], ) assert r.exit_code == 0, r.stderr repo = KartRepo(tmp_path / "repo1") dataset = repo.datasets()["typoes"] cols = _dataset_col_types(dataset) assert cols == { "bigant_pk": { "dataType": "integer", "primaryKeyIndex": 0, "size": 64 }, "bigant": { "dataType": "integer", "size": 64 }, "reel": { "dataType": "float", "size": 32 }, "dubble": { "dataType": "float", "size": 64 }, "smallant": { "dataType": "integer", "size": 16 }, "regularant": { "dataType": "integer", "size": 32 }, "tumeric": { "dataType": "numeric" }, "tumeric20_0": { "dataType": "numeric", "precision": 20 }, "tumeric4_0": { "dataType": "numeric", "precision": 4 }, "tumeric5_5": { "dataType": "numeric", "precision": 5, "scale": 5 }, "tumeric99_0": { "dataType": "numeric", "precision": 99 }, "techs": { "dataType": "text" }, "techs10": { "dataType": "text", "length": 100 }, } # Now generate a DBF file, and try again from there. ogr_conn_str = postgres_url_to_ogr_conn_str( os.environ["KART_POSTGRES_URL"]) gdal.VectorTranslate( str(tmp_path / "typoes.dbf"), ogr_conn_str, format="ESRI Shapefile", layers=["typoes"], ) r = cli_runner.invoke(["init", str(tmp_path / "repo2")]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke([ "-C", str(tmp_path / "repo2"), "import", str(tmp_path / "typoes.dbf"), "typoes", ], ) assert r.exit_code == 0, r.stderr repo = KartRepo(tmp_path / "repo2") dataset = repo.datasets()["typoes"] cols = _dataset_col_types(dataset) assert cols == { "FID": { "dataType": "integer", "primaryKeyIndex": 0, "size": 64 }, "bigant": { "dataType": "integer", "size": 64 }, "regularant": { "dataType": "integer", "size": 32 }, "smallant": { "dataType": "integer", "size": 32 }, "dubble": { "dataType": "float", "size": 64 }, "techs": { "dataType": "text", "length": 80 }, "techs10": { "dataType": "text", "length": 100 }, "tumeric20_": { "dataType": "numeric", "precision": 20, "scale": 0 }, "tumeric4_0": { "dataType": "numeric", "precision": 4, "scale": 0 }, "tumeric5_5": { "dataType": "numeric", "precision": 5, "scale": 5 }, "tumeric99_": { "dataType": "numeric", "precision": 99, "scale": 0 }, # These two types conversions are regrettable, but unavoidable as we are using OGR. "reel": { "dataType": "float", "size": 64 }, "tumeric": { "dataType": "float", "size": 64 }, }
def test_postgis_import_from_view_no_pk( postgis_db, postgis_layer, data_archive, tmp_path, cli_runner, request, chdir, ): repo_path = tmp_path / "repo" with postgis_layer("gpkg-points", "nz-pa-points-topo-150k.gpkg", "nz_pa_points_topo_150k"): with postgis_db.connect() as conn: conn.execute(""" CREATE OR REPLACE VIEW nz_pa_points_view AS ( SELECT geom, t50_fid, name_ascii, macronated, name FROM nz_pa_points_topo_150k WHERE fid %% 3 != 0 ); """) _test_postgis_import( repo_path, cli_runner, chdir, table_name="nz_pa_points_view", pk_name="auto_pk", ) repo = KartRepo(repo_path) dataset = repo.datasets()["nz_pa_points_view"] initial_pks = [f["auto_pk"] for f in dataset.features()] assert len(initial_pks) == 1429 assert max(initial_pks) == 1429 assert sorted(initial_pks) == list(range(1, 1429 + 1)) with postgis_db.connect() as conn: conn.execute("DROP VIEW IF EXISTS nz_pa_points_view;") conn.execute(""" CREATE OR REPLACE VIEW nz_pa_points_view AS ( SELECT geom, t50_fid, name_ascii, macronated, name FROM nz_pa_points_topo_150k WHERE fid %% 3 != 1 ); """) r = cli_runner.invoke([ "--repo", str(repo_path.resolve()), "import", os.environ["KART_POSTGRES_URL"], "nz_pa_points_view", "--replace-existing", ]) assert r.exit_code == 0, r.stderr repo = KartRepo(repo_path) dataset = repo.datasets()["nz_pa_points_view"] new_pks = [f["auto_pk"] for f in dataset.features()] assert len(new_pks) == 1428 assert max(new_pks) == 2143 assert len(set(initial_pks) & set(new_pks)) == 714 # 2143 features total - but 1429 are in the first group and 1428 are in the second group # Means 714 features are in both, and should be imported with the same PK both times # 1429 + 1428 is 2857, which is 714 more features than the actual total of 2143 with postgis_db.connect() as conn: # This is similar enough to be detected as an edit - only one field is different. conn.execute( "UPDATE nz_pa_points_topo_150k SET name_ascii='foo' WHERE fid=3;" ) # This is similar enough to be detected as an edit - only one field is different. conn.execute( "UPDATE nz_pa_points_topo_150k SET name='qux' WHERE fid=6;") # This will not be detected as an edit - two fields are different, # so it looks like one feature is deleted and a different one is inserted. conn.execute( "UPDATE nz_pa_points_topo_150k SET name_ascii='bar', name='baz' WHERE fid=9;" ) conn.execute("DROP VIEW IF EXISTS nz_pa_points_view;") conn.execute(""" CREATE OR REPLACE VIEW nz_pa_points_view AS ( SELECT geom, t50_fid, name_ascii, macronated, name FROM nz_pa_points_topo_150k WHERE fid %% 3 != 2 ); """) r = cli_runner.invoke([ "--repo", str(repo_path.resolve()), "import", os.environ["KART_POSTGRES_URL"], "nz_pa_points_view", "--replace-existing", ]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke(["--repo", str(repo_path.resolve()), "show"]) assert r.exit_code == 0, r.stderr output = r.stdout.splitlines() # Huge amount of adds and deletes caused by changing which features are included in the view again: assert len(output) == 10031 # But, we still are able to recognise the edits we made as edits. # (For happy mathematical reasons, these diffs end up at the end of the output) assert output[-22:] == [ # Edit: name_ascii changed to foo "--- nz_pa_points_view:feature:1430", "+++ nz_pa_points_view:feature:1430", "- name_ascii = Tauwhare Pa", "+ name_ascii = foo", # Edit: name changed to qux "--- nz_pa_points_view:feature:1431", "+++ nz_pa_points_view:feature:1431", "- name = ␀", "+ name = qux", # Not considered an edit - both name_ascii and name changed # So, left as a delete + insert, and assigned a new PK "--- nz_pa_points_view:feature:1432", "- auto_pk = 1432", "- geom = POINT(...)", "- t50_fid = 2426279", "- name_ascii = ␀", "- macronated = N", "- name = ␀", "+++ nz_pa_points_view:feature:2144", "+ auto_pk = 2144", "+ geom = POINT(...)", "+ t50_fid = 2426279", "+ name_ascii = bar", "+ macronated = N", "+ name = baz", ]
def test_clone_with_reference_spatial_filter(data_archive, cli_runner, tmp_path): # TODO - this currently tests that the spatial filter is correctly applied locally after # the entire repo is cloned. Applying a reference spatial filter remotely to do a # partial clone is not yet supported. geom = SPATIAL_FILTER_GEOMETRY["polygons"] crs = SPATIAL_FILTER_CRS["polygons"] file_path = tmp_path / "spatialfilter.txt" file_path.write_text(f"{crs}\n\n{geom}\n", encoding="utf-8") with data_archive("polygons") as repo1_path: r = cli_runner.invoke([ "commit-files", "-m", "Add spatial filter", f"spatialfilter.txt=@{file_path}", ]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke(["git", "hash-object", file_path]) assert r.exit_code == 0, r.stderr blob_sha = r.stdout.strip() r = cli_runner.invoke( ["git", "update-ref", "refs/filters/octagon", blob_sha]) assert r.exit_code == 0, r.stderr # Spatial filter is now stored with ref "octagon". # Test spatial-filter resolve: r = cli_runner.invoke(["spatial-filter", "resolve", "octagon"]) assert r.exit_code == 0, r.stderr assert r.stdout.startswith(f"{crs}\n\nPOLYGON((174.879 -37.8277,") r = cli_runner.invoke( ["spatial-filter", "resolve", "octagon", "--envelope"]) assert r.exit_code == 0, r.stderr assert r.stdout == "174.879,-37.9783,175.3878,-37.4987\n" r = cli_runner.invoke( ["spatial-filter", "resolve", "octagon", "-o", "json"]) assert r.exit_code == 0, r.stderr jdict = json.loads(r.stdout) assert jdict["reference"] == "refs/filters/octagon" assert jdict["objectId"] == blob_sha assert jdict["geometry"].startswith( "01030000000100000009000000E3A59BC420DC65401973D7") assert jdict["crs"] == crs r = cli_runner.invoke([ "spatial-filter", "resolve", "octagon", "-o", "json", "--envelope" ]) assert r.exit_code == 0, r.stderr envelope = json.loads(r.stdout) assert envelope == [174.879, -37.9783, 175.3878, -37.4987] # This is disabled by default as it is still not fully supported. os.environ["X_KART_SPATIAL_FILTER_REFERENCE"] = "1" try: # Clone repo using spatial filter reference repo2_path = tmp_path / "repo2" r = cli_runner.invoke([ "clone", repo1_path, repo2_path, "--spatial-filter=octagon", "--spatial-filter-after-clone", ]) assert r.exit_code == 0, r.stderr # The resulting repo has the spatial filter configured locally. repo2 = KartRepo(repo2_path) assert (repo2.config["kart.spatialfilter.reference"] == "refs/filters/octagon") assert repo2.config["kart.spatialfilter.objectid"] == blob_sha # However, the entire polygons layer was cloned. # TODO: Only clone the features that match the spatial filter. assert (local_features( repo2.datasets()[H.POLYGONS.LAYER]) == H.POLYGONS.ROWCOUNT) with repo2.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == 44 # Clone repo using spatial filter object ID repo3_path = tmp_path / "repo3" r = cli_runner.invoke([ "clone", repo1_path, repo3_path, f"--spatial-filter={blob_sha}", "--spatial-filter-after-clone", ]) assert r.exit_code == 0, r.stderr repo3 = KartRepo(repo3_path) assert repo3.config["kart.spatialfilter.geometry"].startswith( "POLYGON ((174.879 -37.8277,") assert repo3.config["kart.spatialfilter.crs"] == crs with repo3.working_copy.session() as sess: assert H.row_count(sess, H.POLYGONS.LAYER) == 44 # Missing spatial filter: repo4_path = tmp_path / "repo4" r = cli_runner.invoke([ "clone", repo1_path, repo4_path, "--spatial-filter=dodecahedron", "--spatial-filter-after-clone", ]) assert r.exit_code == NO_SPATIAL_FILTER, r.stderr finally: del os.environ["X_KART_SPATIAL_FILTER_REFERENCE"]
def test_commit( archive, layer, partial, data_working_copy, cli_runner, request, edit_points, edit_polygons, edit_table, ): """ commit outstanding changes from the working copy """ with data_working_copy(archive) as (repo_dir, wc_path): # empty r = cli_runner.invoke(["commit", "-m", "test-commit-empty"]) assert r.exit_code == NO_CHANGES, r assert r.stderr.splitlines() == ["Error: No changes to commit"] # empty r = cli_runner.invoke(["commit", "-m", "test-commit-empty", "--allow-empty"]) assert r.exit_code == 0, r # make some changes repo = KartRepo(repo_dir) with repo.working_copy.session() as sess: try: edit_func = locals()[f"edit_{archive}"] pk_del = edit_func(sess) except KeyError: raise NotImplementedError(f"No edit_{archive}") print(f"deleted fid={pk_del}") repo = KartRepo(repo_dir) dataset = repo.datasets()[layer] wc = repo.working_copy original_change_count = wc.tracking_changes_count(dataset) if partial: r = cli_runner.invoke( ["commit", "-m", "test-commit-1", "-o", "json", f"{layer}:{pk_del}"] ) else: r = cli_runner.invoke(["commit", "-m", "test-commit-1", "-o", "json"]) assert r.exit_code == 0, r commit_id = json.loads(r.stdout)["kart.commit/v1"]["commit"] print("commit:", commit_id) assert str(repo.head.target) == commit_id commit = repo.head_commit assert commit.message == "test-commit-1" assert time.time() - commit.commit_time < 10 tree = repo.head_tree assert dataset.encode_1pk_to_path(pk_del) not in tree wc.assert_db_tree_match(tree) change_count = wc.tracking_changes_count(dataset) if partial: # All but one change should still be in the tracking table assert change_count == original_change_count - 1 # Changes should still be visible in the working copy: r = cli_runner.invoke(["diff", "--exit-code"]) assert r.exit_code == 1, r assert r.stdout != "" else: assert ( change_count == 0 ), f"Changes still listed in {wc.TRACKING_TABLE} after full commit" r = cli_runner.invoke(["diff", "--exit-code"]) assert r.exit_code == 0, r assert r.stdout == ""