def test_import_multiple(data_archive, chdir, cli_runner, tmp_path): repo_path = tmp_path / "repo" repo_path.mkdir() with chdir(repo_path): r = cli_runner.invoke(["init"]) assert r.exit_code == 0, r repo = KartRepo(repo_path) assert repo.is_empty LAYERS = ( ("gpkg-points", "nz-pa-points-topo-150k.gpkg", H.POINTS.LAYER), ("gpkg-polygons", "nz-waca-adjustments.gpkg", H.POLYGONS.LAYER), ) datasets = [] for i, (archive, source_gpkg, table) in enumerate(LAYERS): with data_archive(archive) as data: with chdir(repo_path): r = cli_runner.invoke( ["import", f"GPKG:{data / source_gpkg}", table]) assert r.exit_code == 0, r datasets.append( _import_check( repo_path, table, f"{data / source_gpkg}", )) assert len([c for c in repo.walk(repo.head.target)]) == i + 1 if i + 1 == len(LAYERS): r = cli_runner.invoke( ["import", f"GPKG:{data / source_gpkg}", table]) assert r.exit_code == INVALID_OPERATION # has two commits assert len([c for c in repo.walk(repo.head.target)]) == len(LAYERS) tree = repo.head_tree for i, ds in enumerate(datasets): assert ds.path == LAYERS[i][2] feature = next(ds.features()) f_path = ds.encode_1pk_to_path(feature[ds.primary_key]) assert tree / f_path
def test_fast_import(data_archive, tmp_path, cli_runner, chdir): table = H.POINTS.LAYER with data_archive("gpkg-points") as data: # list tables repo_path = tmp_path / "repo" repo_path.mkdir() with chdir(repo_path): r = cli_runner.invoke(["init"]) assert r.exit_code == 0, r repo = KartRepo(repo_path) source = TableImportSource.open(data / "nz-pa-points-topo-150k.gpkg", table=table) fast_import.fast_import_tables(repo, [source], from_commit=None) assert not repo.is_empty assert repo.head.name == "refs/heads/main" assert repo.head.shorthand == "main" dataset = repo.datasets()[table] assert dataset.VERSION == 3 # has a single commit assert len([c for c in repo.walk(repo.head.target)]) == 1 assert list(dataset.meta_items()) # has the right number of features feature_count = sum(1 for f in dataset.features()) assert feature_count == source.feature_count
def upgrade(ctx, source, dest, in_place): """ Upgrade a repository for an earlier version of Kart to be compatible with the latest version. The current repository structure of Kart is known as Datasets V2, which is used from kart/Kart 0.5 onwards. Usage: kart upgrade SOURCE DEST """ source = Path(source) dest = Path(dest) if in_place: dest = source if not in_place and dest.exists() and any(dest.iterdir()): raise InvalidOperation(f'"{dest}" isn\'t empty', param_hint="DEST") try: source_repo = KartRepo(source) except NotFound: raise click.BadParameter( f"'{source}': not an existing Kart repository", param_hint="SOURCE") source_version = source_repo.table_dataset_version if source_version == DEFAULT_NEW_REPO_VERSION: raise InvalidOperation( f"Cannot upgrade: source repository is already at latest known version (Datasets V{source_version})" ) if source_version > DEFAULT_NEW_REPO_VERSION: # Repo is too advanced for this version of Kart to understand, we can't upgrade it. # This prints a good error messsage explaining the whole situation. source_repo.ensure_supported_version() source_dataset_class = dataset_class_for_legacy_version( source_version, in_place) if not source_dataset_class: raise InvalidOperation( f"Unrecognised source repository version: {source_version}") # action! if in_place: dest_repo = ForceLatestVersionRepo(dest) else: click.secho(f"Initialising {dest} ...", bold=True) dest.mkdir() dest_repo = KartRepo.init_repository(dest, wc_location=None, bare=source_repo.is_bare) # walk _all_ references source_walker = source_repo.walk( None, pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE) for ref in source_repo.listall_reference_objects(): source_walker.push(ref.resolve().target) commit_map = {} click.secho("\nWriting new commits ...", bold=True) i = -1 for i, source_commit in enumerate(source_walker): dest_parents = [] for parent_id in source_commit.parent_ids: try: dest_parents.append(commit_map[parent_id.hex]) except KeyError: raise ValueError( f"Commit {i} ({source_commit.id}): Haven't seen parent ({parent_id})" ) _upgrade_commit( ctx, i, source_repo, source_commit, source_dataset_class, dest_parents, dest_repo, commit_map, ) click.echo(f"{i+1} commits processed.") click.secho("\nUpdating references ...", bold=True) for ref in source_repo.listall_reference_objects(): if ref.type == pygit2.GIT_REF_OID: # real references target = commit_map[ref.target.hex] dest_repo.references.create(ref.name, target, True) # overwrite click.echo(f" {ref.name} ({ref.target.hex[:8]} → {target[:8]})") for ref in source_repo.listall_reference_objects(): if ref.type == pygit2.GIT_REF_SYMBOLIC: dest_repo.references.create(ref.name, ref.target) click.echo(f" {ref.name} → {ref.target}") if i >= 0: if source_repo.head_is_detached: dest_repo.set_head( pygit2.Oid(hex=commit_map[source_repo.head.target.hex])) else: dest_repo.set_head(source_repo.head.name) click.secho("\nCompacting repository ...", bold=True) if in_place: # old reflogs will refer to old objects, which prevents them from getting gc'd. # so we clear out the old reflogs here. # this *does* mean you can't go back, hence the 'irreversible' in the --in-place help. dest_repo.invoke_git("reflog", "expire", "--expire-unreachable=now", "--all") dest_repo.gc("--prune=now") if source_repo.workingcopy_location: click.secho("\nCreating working copy ...", bold=True) subctx = click.Context(ctx.command, parent=ctx) subctx.ensure_object(context.Context) subctx.obj.user_repo_path = str(dest) subctx.invoke(checkout.create_workingcopy) if in_place: dest_repo.config[KartConfigKeys.KART_REPOSTRUCTURE_VERSION] = str( DEFAULT_NEW_REPO_VERSION) click.secho("\nUpgrade complete", fg="green", bold=True)
def test_import_from_non_gpkg( archive, source_gpkg, table, data_archive, tmp_path, cli_runner, chdir, request, source_format, source_ogr_driver, ): """ Import something else into a Kart repository. """ param_ids = H.parameter_ids(request) with data_archive(archive) as data: with Db_GPKG.create_engine(data / source_gpkg).connect() as conn: if param_ids[-1] == "empty": print(f"emptying table {table}...") conn.execute(f"DELETE FROM {table};") num_rows = conn.execute( f"SELECT COUNT(*) FROM {table};").fetchone()[0] if param_ids[-1] == "empty": assert num_rows == 0 # First, import the original GPKG to one repo gpkg_repo_path = tmp_path / "gpkg" gpkg_repo_path.mkdir() with chdir(gpkg_repo_path): r = cli_runner.invoke(["init"]) assert r.exit_code == 0, r r = cli_runner.invoke(["import", data / source_gpkg, table]) assert r.exit_code == 0, r gpkg_repo = KartRepo(gpkg_repo_path) gpkg_dataset = gpkg_repo.datasets()[table] # convert to a new format using OGR source_filename = tmp_path / f"data.{source_format.lower()}" gdal.VectorTranslate( str(source_filename), gdal.OpenEx(str(data / source_gpkg)), format=source_ogr_driver, layers=[table], ) repo_path = tmp_path / "non-gpkg" repo_path.mkdir() with chdir(repo_path): r = cli_runner.invoke(["init"]) assert r.exit_code == 0, r repo = KartRepo(repo_path) assert repo.is_empty # Import from SHP/TAB/something into Kart r = cli_runner.invoke([ "import", str(source_filename), f"data:{table}", ]) assert r.exit_code == 0, r assert not repo.is_empty assert repo.head.name == "refs/heads/main" assert repo.head.shorthand == "main" # has a single commit assert len([c for c in repo.walk(repo.head.target)]) == 1 dataset = _import_check(repo_path, table, f"{data / source_gpkg}") # Compare the meta items to the GPKG-imported ones repo = KartRepo(repo_path) dataset = repo.datasets()[table] _compare_ogr_and_gpkg_meta_items(dataset, gpkg_dataset) if num_rows > 0: # compare the first feature in the repo against the source DB got_feature = next(dataset.features()) pk = got_feature[dataset.primary_key] src_ds = ogr.Open(str(source_filename)) src_layer = src_ds.GetLayer(0) assert src_layer.GetFeatureCount() == num_rows f = src_layer.GetFeature(pk) expected_feature = { f.GetFieldDefnRef(i).GetName(): f.GetField(i) for i in range(f.GetFieldCount()) } if "date_adjus" in expected_feature: expected_feature["date_adjus"] = expected_feature[ "date_adjus"].replace("/", "-") expected_feature["FID"] = f.GetFID() if src_layer.GetGeomType() != ogr.wkbNone: g = f.GetGeometryRef() if g: g.AssignSpatialReference(src_layer.GetSpatialRef()) if table == H.POLYGONS.LAYER: g = ogr.ForceToMultiPolygon(g) expected_feature["geom"] = ogr_to_gpkg_geom(g) assert normalise_feature(got_feature) == expected_feature
def test_import( profile, archive, source_gpkg, table, data_archive, tmp_path, cli_runner, chdir, benchmark, request, monkeypatch, ): """ Import the GeoPackage (eg. `kx-foo-layer.gpkg`) into a Kart repository. """ param_ids = H.parameter_ids(request) # wrap the original functions with benchmarking orig_import_func = fast_import.fast_import_tables orig_checkout_func = init._add_datasets_to_working_copy def _benchmark_import(*args, **kwargs): # one round/iteration isn't very statistical, but hopefully crude idea return benchmark.pedantic(orig_import_func, args=args, kwargs=kwargs, rounds=1, iterations=1) def _benchmark_checkout(*args, **kwargs): return benchmark.pedantic(orig_checkout_func, args=args, kwargs=kwargs, rounds=1, iterations=1) if profile == "fast_import": monkeypatch.setattr(init, "fast_import_tables", _benchmark_import) else: monkeypatch.setattr(init, "_add_datasets_to_working_copy", _benchmark_checkout) with data_archive(archive) as data: # list tables repo_path = tmp_path / "repo" repo_path.mkdir() with Db_GPKG.create_engine(data / source_gpkg).connect() as conn: if param_ids[-1] == "empty": print(f"emptying table {table}...") conn.execute(f"DELETE FROM {table};") num_rows = conn.execute( f"SELECT COUNT(*) FROM {table};").fetchone()[0] benchmark.group = f"test_import - {param_ids[-1]} (N={num_rows})" if param_ids[-1] == "empty": assert num_rows == 0 with chdir(repo_path): r = cli_runner.invoke(["init"]) assert r.exit_code == 0, r repo = KartRepo(repo_path) assert repo.is_empty r = cli_runner.invoke(["import", str(data / source_gpkg), table]) assert r.exit_code == 0, r assert not repo.is_empty assert repo.head.name == "refs/heads/main" assert repo.head.shorthand == "main" # has a single commit assert len(list(repo.walk(repo.head.target))) == 1 dataset = _import_check(repo_path, table, f"{data / source_gpkg}") with Db_GPKG.create_engine(data / source_gpkg).connect() as conn: pk_field = Db_GPKG.pk_name(conn, table=table) if num_rows > 0: # compare the first feature in the repo against the source DB feature = next(dataset.features()) row = normalise_feature( conn.execute( f"SELECT * FROM {table} WHERE {pk_field}=?;", [feature[pk_field]], ).fetchone()) feature = normalise_feature(feature) print("First Feature:", feature, row) assert feature == row # compare a source DB feature against the repo feature row = normalise_feature( conn.execute( f"SELECT * FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};" ).fetchone()) for feature in dataset.features(): if feature[pk_field] == row[pk_field]: feature = normalise_feature(feature) assert feature == row break else: pytest.fail( f"Couldn't find repo feature {pk_field}={row[pk_field]}" )
def test_init_import( archive, gpkg, table, data_archive, tmp_path, cli_runner, chdir, ): """ Import the GeoPackage (eg. `kx-foo-layer.gpkg`) into a Kart repository. """ with data_archive(archive) as data: # list tables repo_path = tmp_path / "repo" repo_path.mkdir() r = cli_runner.invoke([ "init", "--import", f"gpkg:{data / gpkg}", str(repo_path), ]) assert r.exit_code == 0, r assert (repo_path / ".kart" / "HEAD").exists() repo = KartRepo(repo_path) assert not repo.is_bare assert not repo.is_empty assert repo.head.name == "refs/heads/main" assert repo.head.shorthand == "main" # has a single commit assert len([c for c in repo.walk(repo.head.target)]) == 1 # working copy exists wc = repo_path / f"{repo_path.stem}.gpkg" assert wc.exists() and wc.is_file() print("workingcopy at", wc) assert repo.config["kart.repostructure.version"] == "3" assert repo.config["kart.workingcopy.location"] == f"{wc.name}" with repo.working_copy.session() as sess: assert H.row_count(sess, table) > 0 wc_tree_id = sess.scalar( """SELECT value FROM "gpkg_kart_state" WHERE table_name='*' AND key='tree';""" ) assert wc_tree_id == repo.head_tree.hex xml_metadata = sess.scalar(f""" SELECT m.metadata FROM gpkg_metadata m JOIN gpkg_metadata_reference r ON m.id = r.md_file_id WHERE r.table_name = '{table}' """) if table in ("nz_pa_points_topo_150k", "nz_waca_adjustments"): assert xml_metadata.startswith( '<gmd:MD_Metadata xmlns:gco="http://www.isotc211.org/2005/gco"' ) else: assert not xml_metadata srs_definition = sess.scalar(f""" SELECT srs.definition FROM gpkg_spatial_ref_sys srs JOIN gpkg_geometry_columns geom ON srs.srs_id = geom.srs_id WHERE geom.table_name = '{table}' """) if srs_definition: srs_definition = re.sub(r",\s*", ", ", srs_definition) if table == "nz_pa_points_topo_150k": assert srs_definition.startswith( 'GEOGCS["WGS 84", DATUM["WGS_1984"') elif table == "nz_waca_adjustments": assert srs_definition.startswith( 'GEOGCS["NZGD2000", DATUM["New_Zealand_Geodetic_Datum_2000"' ) H.verify_gpkg_extent(sess, table) with chdir(repo_path): # check that we can view the commit we created cli_runner.invoke(["show", "-o", "json"])