Example #1
0
def test_feature_find_decode_performance(
    profile,
    archive,
    source_gpkg,
    table,
    data_archive,
    data_imported,
    benchmark,
    request,
):
    """ Check single-feature decoding performance """
    param_ids = H.parameter_ids(request)
    benchmark.group = (
        f"test_feature_find_decode_performance - {profile} - {param_ids[-1]}")

    repo_path = data_imported(archive, source_gpkg, table)
    repo = KartRepo(repo_path)
    dataset = repo.datasets()["mytable"]
    inner_tree = dataset.inner_tree

    with data_archive(archive) as data:
        with Db_GPKG.create_engine(data / source_gpkg).connect() as conn:
            num_rows = conn.execute(
                f"SELECT COUNT(*) FROM {table};").fetchone()[0]
            pk_field = Db_GPKG.pk_name(conn, table=table)
            pk = conn.execute(
                f"SELECT {pk_field} FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};"
            ).fetchone()[0]

    if profile == "get_feature_by_pk":
        benchmark(dataset.get_feature, pk)

    elif profile == "get_feature_from_data":
        feature_path = dataset.encode_1pk_to_path(pk, relative=True)
        feature_data = memoryview(inner_tree / feature_path)

        benchmark(dataset.get_feature, path=feature_path, data=feature_data)
    else:
        raise NotImplementedError(f"Unknown profile: {profile}")
Example #2
0
def test_import(
    profile,
    archive,
    source_gpkg,
    table,
    data_archive,
    tmp_path,
    cli_runner,
    chdir,
    benchmark,
    request,
    monkeypatch,
):
    """ Import the GeoPackage (eg. `kx-foo-layer.gpkg`) into a Kart repository. """
    param_ids = H.parameter_ids(request)

    # wrap the original functions with benchmarking
    orig_import_func = fast_import.fast_import_tables
    orig_checkout_func = init._add_datasets_to_working_copy

    def _benchmark_import(*args, **kwargs):
        # one round/iteration isn't very statistical, but hopefully crude idea
        return benchmark.pedantic(orig_import_func,
                                  args=args,
                                  kwargs=kwargs,
                                  rounds=1,
                                  iterations=1)

    def _benchmark_checkout(*args, **kwargs):
        return benchmark.pedantic(orig_checkout_func,
                                  args=args,
                                  kwargs=kwargs,
                                  rounds=1,
                                  iterations=1)

    if profile == "fast_import":
        monkeypatch.setattr(init, "fast_import_tables", _benchmark_import)
    else:
        monkeypatch.setattr(init, "_add_datasets_to_working_copy",
                            _benchmark_checkout)

    with data_archive(archive) as data:
        # list tables
        repo_path = tmp_path / "repo"
        repo_path.mkdir()

        with Db_GPKG.create_engine(data / source_gpkg).connect() as conn:
            if param_ids[-1] == "empty":
                print(f"emptying table {table}...")
                conn.execute(f"DELETE FROM {table};")

            num_rows = conn.execute(
                f"SELECT COUNT(*) FROM {table};").fetchone()[0]
        benchmark.group = f"test_import - {param_ids[-1]} (N={num_rows})"

        if param_ids[-1] == "empty":
            assert num_rows == 0

        with chdir(repo_path):
            r = cli_runner.invoke(["init"])
            assert r.exit_code == 0, r

            repo = KartRepo(repo_path)
            assert repo.is_empty

            r = cli_runner.invoke(["import", str(data / source_gpkg), table])
            assert r.exit_code == 0, r

            assert not repo.is_empty
            assert repo.head.name == "refs/heads/main"
            assert repo.head.shorthand == "main"

            # has a single commit
            assert len(list(repo.walk(repo.head.target))) == 1

            dataset = _import_check(repo_path, table, f"{data / source_gpkg}")

            with Db_GPKG.create_engine(data / source_gpkg).connect() as conn:
                pk_field = Db_GPKG.pk_name(conn, table=table)

                if num_rows > 0:
                    # compare the first feature in the repo against the source DB
                    feature = next(dataset.features())

                    row = normalise_feature(
                        conn.execute(
                            f"SELECT * FROM {table} WHERE {pk_field}=?;",
                            [feature[pk_field]],
                        ).fetchone())
                    feature = normalise_feature(feature)
                    print("First Feature:", feature, row)
                    assert feature == row

                    # compare a source DB feature against the repo feature
                    row = normalise_feature(
                        conn.execute(
                            f"SELECT * FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};"
                        ).fetchone())

                    for feature in dataset.features():
                        if feature[pk_field] == row[pk_field]:
                            feature = normalise_feature(feature)
                            assert feature == row
                            break
                    else:
                        pytest.fail(
                            f"Couldn't find repo feature {pk_field}={row[pk_field]}"
                        )