예제 #1
0
def test_import_multiple(data_archive, chdir, cli_runner, tmp_path):
    repo_path = tmp_path / "repo"
    repo_path.mkdir()

    with chdir(repo_path):
        r = cli_runner.invoke(["init"])
        assert r.exit_code == 0, r

    repo = KartRepo(repo_path)
    assert repo.is_empty

    LAYERS = (
        ("gpkg-points", "nz-pa-points-topo-150k.gpkg", H.POINTS.LAYER),
        ("gpkg-polygons", "nz-waca-adjustments.gpkg", H.POLYGONS.LAYER),
    )

    datasets = []
    for i, (archive, source_gpkg, table) in enumerate(LAYERS):
        with data_archive(archive) as data:
            with chdir(repo_path):
                r = cli_runner.invoke(
                    ["import", f"GPKG:{data / source_gpkg}", table])
                assert r.exit_code == 0, r

                datasets.append(
                    _import_check(
                        repo_path,
                        table,
                        f"{data / source_gpkg}",
                    ))

                assert len([c for c in repo.walk(repo.head.target)]) == i + 1

                if i + 1 == len(LAYERS):
                    r = cli_runner.invoke(
                        ["import", f"GPKG:{data / source_gpkg}", table])
                    assert r.exit_code == INVALID_OPERATION

    # has two commits
    assert len([c for c in repo.walk(repo.head.target)]) == len(LAYERS)

    tree = repo.head_tree

    for i, ds in enumerate(datasets):
        assert ds.path == LAYERS[i][2]

        feature = next(ds.features())
        f_path = ds.encode_1pk_to_path(feature[ds.primary_key])
        assert tree / f_path
예제 #2
0
def test_fast_import(data_archive, tmp_path, cli_runner, chdir):
    table = H.POINTS.LAYER
    with data_archive("gpkg-points") as data:
        # list tables
        repo_path = tmp_path / "repo"
        repo_path.mkdir()

        with chdir(repo_path):
            r = cli_runner.invoke(["init"])
            assert r.exit_code == 0, r

            repo = KartRepo(repo_path)

            source = TableImportSource.open(data /
                                            "nz-pa-points-topo-150k.gpkg",
                                            table=table)

            fast_import.fast_import_tables(repo, [source], from_commit=None)

            assert not repo.is_empty
            assert repo.head.name == "refs/heads/main"
            assert repo.head.shorthand == "main"

            dataset = repo.datasets()[table]
            assert dataset.VERSION == 3

            # has a single commit
            assert len([c for c in repo.walk(repo.head.target)]) == 1
            assert list(dataset.meta_items())

            # has the right number of features
            feature_count = sum(1 for f in dataset.features())
            assert feature_count == source.feature_count
예제 #3
0
def upgrade(ctx, source, dest, in_place):
    """
    Upgrade a repository for an earlier version of Kart to be compatible with the latest version.
    The current repository structure of Kart is known as Datasets V2, which is used from kart/Kart 0.5 onwards.

    Usage:
    kart upgrade SOURCE DEST
    """
    source = Path(source)
    dest = Path(dest)

    if in_place:
        dest = source

    if not in_place and dest.exists() and any(dest.iterdir()):
        raise InvalidOperation(f'"{dest}" isn\'t empty', param_hint="DEST")

    try:
        source_repo = KartRepo(source)
    except NotFound:
        raise click.BadParameter(
            f"'{source}': not an existing Kart repository",
            param_hint="SOURCE")

    source_version = source_repo.table_dataset_version
    if source_version == DEFAULT_NEW_REPO_VERSION:
        raise InvalidOperation(
            f"Cannot upgrade: source repository is already at latest known version (Datasets V{source_version})"
        )

    if source_version > DEFAULT_NEW_REPO_VERSION:
        # Repo is too advanced for this version of Kart to understand, we can't upgrade it.
        # This prints a good error messsage explaining the whole situation.
        source_repo.ensure_supported_version()

    source_dataset_class = dataset_class_for_legacy_version(
        source_version, in_place)

    if not source_dataset_class:
        raise InvalidOperation(
            f"Unrecognised source repository version: {source_version}")

    # action!
    if in_place:
        dest_repo = ForceLatestVersionRepo(dest)
    else:
        click.secho(f"Initialising {dest} ...", bold=True)
        dest.mkdir()
        dest_repo = KartRepo.init_repository(dest,
                                             wc_location=None,
                                             bare=source_repo.is_bare)

    # walk _all_ references
    source_walker = source_repo.walk(
        None, pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE)
    for ref in source_repo.listall_reference_objects():
        source_walker.push(ref.resolve().target)

    commit_map = {}

    click.secho("\nWriting new commits ...", bold=True)
    i = -1
    for i, source_commit in enumerate(source_walker):
        dest_parents = []
        for parent_id in source_commit.parent_ids:
            try:
                dest_parents.append(commit_map[parent_id.hex])
            except KeyError:
                raise ValueError(
                    f"Commit {i} ({source_commit.id}): Haven't seen parent ({parent_id})"
                )

        _upgrade_commit(
            ctx,
            i,
            source_repo,
            source_commit,
            source_dataset_class,
            dest_parents,
            dest_repo,
            commit_map,
        )

    click.echo(f"{i+1} commits processed.")

    click.secho("\nUpdating references ...", bold=True)
    for ref in source_repo.listall_reference_objects():
        if ref.type == pygit2.GIT_REF_OID:
            # real references
            target = commit_map[ref.target.hex]
            dest_repo.references.create(ref.name, target, True)  # overwrite
            click.echo(f"  {ref.name} ({ref.target.hex[:8]} → {target[:8]})")

    for ref in source_repo.listall_reference_objects():
        if ref.type == pygit2.GIT_REF_SYMBOLIC:
            dest_repo.references.create(ref.name, ref.target)
            click.echo(f"  {ref.name} → {ref.target}")

    if i >= 0:
        if source_repo.head_is_detached:
            dest_repo.set_head(
                pygit2.Oid(hex=commit_map[source_repo.head.target.hex]))
        else:
            dest_repo.set_head(source_repo.head.name)

        click.secho("\nCompacting repository ...", bold=True)
        if in_place:
            # old reflogs will refer to old objects, which prevents them from getting gc'd.
            # so we clear out the old reflogs here.
            # this *does* mean you can't go back, hence the 'irreversible' in the --in-place help.
            dest_repo.invoke_git("reflog", "expire",
                                 "--expire-unreachable=now", "--all")

        dest_repo.gc("--prune=now")

    if source_repo.workingcopy_location:
        click.secho("\nCreating working copy ...", bold=True)
        subctx = click.Context(ctx.command, parent=ctx)
        subctx.ensure_object(context.Context)
        subctx.obj.user_repo_path = str(dest)
        subctx.invoke(checkout.create_workingcopy)

    if in_place:
        dest_repo.config[KartConfigKeys.KART_REPOSTRUCTURE_VERSION] = str(
            DEFAULT_NEW_REPO_VERSION)

    click.secho("\nUpgrade complete", fg="green", bold=True)
예제 #4
0
def test_import_from_non_gpkg(
    archive,
    source_gpkg,
    table,
    data_archive,
    tmp_path,
    cli_runner,
    chdir,
    request,
    source_format,
    source_ogr_driver,
):
    """
    Import something else into a Kart repository.
    """
    param_ids = H.parameter_ids(request)

    with data_archive(archive) as data:
        with Db_GPKG.create_engine(data / source_gpkg).connect() as conn:
            if param_ids[-1] == "empty":
                print(f"emptying table {table}...")
                conn.execute(f"DELETE FROM {table};")

            num_rows = conn.execute(
                f"SELECT COUNT(*) FROM {table};").fetchone()[0]

        if param_ids[-1] == "empty":
            assert num_rows == 0

        # First, import the original GPKG to one repo
        gpkg_repo_path = tmp_path / "gpkg"
        gpkg_repo_path.mkdir()
        with chdir(gpkg_repo_path):
            r = cli_runner.invoke(["init"])
            assert r.exit_code == 0, r
            r = cli_runner.invoke(["import", data / source_gpkg, table])
            assert r.exit_code == 0, r

        gpkg_repo = KartRepo(gpkg_repo_path)
        gpkg_dataset = gpkg_repo.datasets()[table]

        # convert to a new format using OGR
        source_filename = tmp_path / f"data.{source_format.lower()}"
        gdal.VectorTranslate(
            str(source_filename),
            gdal.OpenEx(str(data / source_gpkg)),
            format=source_ogr_driver,
            layers=[table],
        )
        repo_path = tmp_path / "non-gpkg"
        repo_path.mkdir()
        with chdir(repo_path):
            r = cli_runner.invoke(["init"])
            assert r.exit_code == 0, r

            repo = KartRepo(repo_path)
            assert repo.is_empty

            # Import from SHP/TAB/something into Kart
            r = cli_runner.invoke([
                "import",
                str(source_filename),
                f"data:{table}",
            ])
            assert r.exit_code == 0, r

            assert not repo.is_empty
            assert repo.head.name == "refs/heads/main"
            assert repo.head.shorthand == "main"

            # has a single commit
            assert len([c for c in repo.walk(repo.head.target)]) == 1

            dataset = _import_check(repo_path, table, f"{data / source_gpkg}")

            # Compare the meta items to the GPKG-imported ones
            repo = KartRepo(repo_path)
            dataset = repo.datasets()[table]

            _compare_ogr_and_gpkg_meta_items(dataset, gpkg_dataset)

            if num_rows > 0:
                # compare the first feature in the repo against the source DB
                got_feature = next(dataset.features())
                pk = got_feature[dataset.primary_key]

                src_ds = ogr.Open(str(source_filename))
                src_layer = src_ds.GetLayer(0)
                assert src_layer.GetFeatureCount() == num_rows

                f = src_layer.GetFeature(pk)
                expected_feature = {
                    f.GetFieldDefnRef(i).GetName(): f.GetField(i)
                    for i in range(f.GetFieldCount())
                }
                if "date_adjus" in expected_feature:
                    expected_feature["date_adjus"] = expected_feature[
                        "date_adjus"].replace("/", "-")
                expected_feature["FID"] = f.GetFID()
                if src_layer.GetGeomType() != ogr.wkbNone:
                    g = f.GetGeometryRef()
                    if g:
                        g.AssignSpatialReference(src_layer.GetSpatialRef())
                        if table == H.POLYGONS.LAYER:
                            g = ogr.ForceToMultiPolygon(g)
                    expected_feature["geom"] = ogr_to_gpkg_geom(g)

                assert normalise_feature(got_feature) == expected_feature
예제 #5
0
def test_import(
    profile,
    archive,
    source_gpkg,
    table,
    data_archive,
    tmp_path,
    cli_runner,
    chdir,
    benchmark,
    request,
    monkeypatch,
):
    """ Import the GeoPackage (eg. `kx-foo-layer.gpkg`) into a Kart repository. """
    param_ids = H.parameter_ids(request)

    # wrap the original functions with benchmarking
    orig_import_func = fast_import.fast_import_tables
    orig_checkout_func = init._add_datasets_to_working_copy

    def _benchmark_import(*args, **kwargs):
        # one round/iteration isn't very statistical, but hopefully crude idea
        return benchmark.pedantic(orig_import_func,
                                  args=args,
                                  kwargs=kwargs,
                                  rounds=1,
                                  iterations=1)

    def _benchmark_checkout(*args, **kwargs):
        return benchmark.pedantic(orig_checkout_func,
                                  args=args,
                                  kwargs=kwargs,
                                  rounds=1,
                                  iterations=1)

    if profile == "fast_import":
        monkeypatch.setattr(init, "fast_import_tables", _benchmark_import)
    else:
        monkeypatch.setattr(init, "_add_datasets_to_working_copy",
                            _benchmark_checkout)

    with data_archive(archive) as data:
        # list tables
        repo_path = tmp_path / "repo"
        repo_path.mkdir()

        with Db_GPKG.create_engine(data / source_gpkg).connect() as conn:
            if param_ids[-1] == "empty":
                print(f"emptying table {table}...")
                conn.execute(f"DELETE FROM {table};")

            num_rows = conn.execute(
                f"SELECT COUNT(*) FROM {table};").fetchone()[0]
        benchmark.group = f"test_import - {param_ids[-1]} (N={num_rows})"

        if param_ids[-1] == "empty":
            assert num_rows == 0

        with chdir(repo_path):
            r = cli_runner.invoke(["init"])
            assert r.exit_code == 0, r

            repo = KartRepo(repo_path)
            assert repo.is_empty

            r = cli_runner.invoke(["import", str(data / source_gpkg), table])
            assert r.exit_code == 0, r

            assert not repo.is_empty
            assert repo.head.name == "refs/heads/main"
            assert repo.head.shorthand == "main"

            # has a single commit
            assert len(list(repo.walk(repo.head.target))) == 1

            dataset = _import_check(repo_path, table, f"{data / source_gpkg}")

            with Db_GPKG.create_engine(data / source_gpkg).connect() as conn:
                pk_field = Db_GPKG.pk_name(conn, table=table)

                if num_rows > 0:
                    # compare the first feature in the repo against the source DB
                    feature = next(dataset.features())

                    row = normalise_feature(
                        conn.execute(
                            f"SELECT * FROM {table} WHERE {pk_field}=?;",
                            [feature[pk_field]],
                        ).fetchone())
                    feature = normalise_feature(feature)
                    print("First Feature:", feature, row)
                    assert feature == row

                    # compare a source DB feature against the repo feature
                    row = normalise_feature(
                        conn.execute(
                            f"SELECT * FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};"
                        ).fetchone())

                    for feature in dataset.features():
                        if feature[pk_field] == row[pk_field]:
                            feature = normalise_feature(feature)
                            assert feature == row
                            break
                    else:
                        pytest.fail(
                            f"Couldn't find repo feature {pk_field}={row[pk_field]}"
                        )
예제 #6
0
def test_init_import(
    archive,
    gpkg,
    table,
    data_archive,
    tmp_path,
    cli_runner,
    chdir,
):
    """ Import the GeoPackage (eg. `kx-foo-layer.gpkg`) into a Kart repository. """
    with data_archive(archive) as data:
        # list tables
        repo_path = tmp_path / "repo"
        repo_path.mkdir()

        r = cli_runner.invoke([
            "init",
            "--import",
            f"gpkg:{data / gpkg}",
            str(repo_path),
        ])
        assert r.exit_code == 0, r
        assert (repo_path / ".kart" / "HEAD").exists()

        repo = KartRepo(repo_path)
        assert not repo.is_bare
        assert not repo.is_empty

        assert repo.head.name == "refs/heads/main"
        assert repo.head.shorthand == "main"

        # has a single commit
        assert len([c for c in repo.walk(repo.head.target)]) == 1

        # working copy exists
        wc = repo_path / f"{repo_path.stem}.gpkg"
        assert wc.exists() and wc.is_file()
        print("workingcopy at", wc)

        assert repo.config["kart.repostructure.version"] == "3"
        assert repo.config["kart.workingcopy.location"] == f"{wc.name}"

        with repo.working_copy.session() as sess:
            assert H.row_count(sess, table) > 0

            wc_tree_id = sess.scalar(
                """SELECT value FROM "gpkg_kart_state" WHERE table_name='*' AND key='tree';"""
            )
            assert wc_tree_id == repo.head_tree.hex

            xml_metadata = sess.scalar(f"""
                SELECT m.metadata
                FROM gpkg_metadata m JOIN gpkg_metadata_reference r
                ON m.id = r.md_file_id
                WHERE r.table_name = '{table}'
                """)
            if table in ("nz_pa_points_topo_150k", "nz_waca_adjustments"):
                assert xml_metadata.startswith(
                    '<gmd:MD_Metadata xmlns:gco="http://www.isotc211.org/2005/gco"'
                )
            else:
                assert not xml_metadata

            srs_definition = sess.scalar(f"""
                SELECT srs.definition
                FROM gpkg_spatial_ref_sys srs JOIN gpkg_geometry_columns geom
                ON srs.srs_id = geom.srs_id
                WHERE geom.table_name = '{table}'
                """)
            if srs_definition:
                srs_definition = re.sub(r",\s*", ", ", srs_definition)
            if table == "nz_pa_points_topo_150k":
                assert srs_definition.startswith(
                    'GEOGCS["WGS 84", DATUM["WGS_1984"')
            elif table == "nz_waca_adjustments":
                assert srs_definition.startswith(
                    'GEOGCS["NZGD2000", DATUM["New_Zealand_Geodetic_Datum_2000"'
                )

            H.verify_gpkg_extent(sess, table)
        with chdir(repo_path):
            # check that we can view the commit we created
            cli_runner.invoke(["show", "-o", "json"])