def test_import_replace_existing_with_column_renames( data_archive, tmp_path, cli_runner, chdir, geopackage, ): with data_archive("gpkg-polygons") as data: repo_path = tmp_path / "emptydir" r = cli_runner.invoke(["init", repo_path]) assert r.exit_code == 0 with chdir(repo_path): r = cli_runner.invoke([ "import", data / "nz-waca-adjustments.gpkg", "nz_waca_adjustments:mytable", ]) assert r.exit_code == 0, r.stderr # Now reanme # * doesn't include the `survey_reference` column # * has the columns in a different order # * has a new column db = geopackage(data / "nz-waca-adjustments.gpkg") dbcur = db.cursor() dbcur.execute(""" ALTER TABLE nz_waca_adjustments RENAME COLUMN survey_reference TO renamed_survey_reference; """) r = cli_runner.invoke([ "import", "--replace-existing", data / "nz-waca-adjustments.gpkg", "nz_waca_adjustments:mytable", ]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke(["show", "-o", "json"]) assert r.exit_code == 0, r.stderr diff = json.loads(r.stdout)["sno.diff/v1+hexwkb"]["mytable"] # The schema changed, but the features didn't. assert diff["meta"]["schema.json"] assert not diff.get("feature") repo = pygit2.Repository(str(repo_path)) head_rs = RepositoryStructure.lookup(repo, "HEAD") old_rs = RepositoryStructure.lookup(repo, "HEAD^") assert head_rs.tree != old_rs.tree new_feature_tree = head_rs.tree / "mytable/.sno-dataset/feature" old_feature_tree = old_rs.tree / "mytable/.sno-dataset/feature" assert new_feature_tree == old_feature_tree
def _data_working_copy(name, force_new=False): nonlocal incr with data_archive(name) as repo_dir: if name.endswith(".sno"): name = name[:-5] repo = pygit2.Repository(str(repo_dir)) rs = RepositoryStructure(repo) if rs.working_copy: wc_path = rs.working_copy.full_path if force_new: L.info("force_new is set, deleting existing WC: %s", wc_path) del rs.working_copy assert not hasattr(rs, "_working_copy") del wc_path if not rs.working_copy: wc_path = ( tmp_path_factory.mktemp(request.node.name, str(incr)) / f"{name}.gpkg" ) incr += 1 L.info("Checking out to %s", wc_path) r = cli_runner.invoke(["checkout", f"--path={wc_path}"]) assert r.exit_code == 0, r L.debug("Checkout result: %s", r) del rs del repo L.info("data_working_copy: %s %s", repo_dir, wc_path) yield repo_dir, wc_path
def _upgrade_commit( i, source_repo, source_commit, source_version, source_dataset_class, dest_parents, dest_repo, commit_map, ): sources = [ ds for ds in RepositoryStructure( source_repo, commit=source_commit, version=source_version, dataset_class=source_dataset_class, ) ] dataset_count = len(sources) feature_count = sum(s.feature_count for s in sources) s = source_commit author_time = f"{s.author.time} {minutes_to_tz_offset(s.author.offset)}" commit_time = f"{s.commit_time} {minutes_to_tz_offset(s.commit_time_offset)}" header = ( # We import every commit onto refs/heads/master and fix the branch heads later. "commit refs/heads/master\n" f"author {s.author.name} <{s.author.email}> {author_time}\n" f"committer {s.committer.name} <{s.committer.email}> {commit_time}\n" f"data {len(s.message.encode('utf8'))}\n{s.message}\n" ) header += "".join(f"merge {p}\n" for p in dest_parents) fast_import_tables( dest_repo, sources, replace_existing=ReplaceExisting.ALL, quiet=True, header=header, # We import every commit onto refs/heads/master, even though not all commits are related - this means # the master branch head will jump all over the place. git-fast-import only allows this with --force. extra_cmd_args=["--force"], ) dest_commit = dest_repo.head.peel(pygit2.Commit) commit_map[source_commit.hex] = dest_commit.hex commit_time = datetime.fromtimestamp(source_commit.commit_time) click.echo( f" {i}: {source_commit.hex[:8]} → {dest_commit.hex[:8]}" f" ({commit_time}; {source_commit.committer.name}; {dataset_count} datasets; {feature_count} rows)" )
def test_checkout_workingcopy(version, archive, table, commit_sha, data_archive, tmp_path, cli_runner, geopackage): """ Checkout a working copy to edit """ if version == "2": archive += "2" sno_state_table = "gpkg_sno_state" else: sno_state_table = ".sno-meta" with data_archive(archive) as repo_path: H.clear_working_copy() repo = pygit2.Repository(str(repo_path)) r = cli_runner.invoke(["checkout"]) wc = Path(repo.config["sno.workingcopy.path"]) assert r.exit_code == 0, r assert r.stdout.splitlines() == [f'Creating working copy at {wc} ...'] assert wc.exists() db = geopackage(wc) assert H.row_count(db, table) > 0 assert repo.is_bare assert repo.head.name == "refs/heads/master" assert repo.head.shorthand == "master" head_tree = repo.head.peel(pygit2.Tree) wc_tree_id = (db.cursor().execute( f"""SELECT value FROM "{sno_state_table}" WHERE table_name='*' AND key='tree';""" ).fetchone()[0]) assert wc_tree_id == head_tree.hex wc = WorkingCopy.get(repo) assert wc.assert_db_tree_match(head_tree) rs = RepositoryStructure(repo) cols, pk_col = wc._get_columns(rs[table]) expected_col_spec = f'"{pk_col}" INTEGER PRIMARY KEY AUTOINCREMENT' assert cols[pk_col] in (expected_col_spec, f"{expected_col_spec} NOT NULL")
def _upgrade_commit(i, source_repo, source_commit, dest_parents, dest_repo, commit_map): source_repo_structure = RepositoryStructure(source_repo, commit=source_commit) sources = { dataset.path: ImportV1Dataset(dataset) for dataset in source_repo_structure } dataset_count = len(sources) feature_count = sum(s.row_count for s in sources.values()) s = source_commit commit_time = _raw_commit_time(s) header = ( "commit refs/heads/master\n" f"author {s.author.name} <{s.author.email}> {commit_time}\n" f"committer {s.committer.name} <{s.committer.email}> {commit_time}\n" f"data {len(s.message.encode('utf8'))}\n{s.message}\n") header += "".join(f"merge {p}\n" for p in dest_parents) fast_import_tables( dest_repo, sources, incremental=False, quiet=True, header=header, structure_version=2, ) dest_commit = dest_repo.head.peel(pygit2.Commit) commit_map[source_commit.hex] = dest_commit.hex commit_time = datetime.fromtimestamp(source_commit.commit_time) click.echo( f" {i}: {source_commit.hex[:8]} → {dest_commit.hex[:8]} ({commit_time}; {source_commit.committer.name}; {dataset_count} datasets; {feature_count} rows)" )
def get_upgrade_sources(source_repo, source_commit): """Return upgrade sources for all V1 datasets at the given commit.""" source_repo_structure = RepositoryStructure(source_repo, commit=source_commit) return [ImportV1Dataset(dataset) for dataset in source_repo_structure]
def test_import_replace_existing_with_compatible_schema_changes( data_archive, tmp_path, cli_runner, chdir, geopackage, ): with data_archive("gpkg-polygons") as data: repo_path = tmp_path / "emptydir" r = cli_runner.invoke(["init", repo_path]) assert r.exit_code == 0 with chdir(repo_path): r = cli_runner.invoke([ "import", data / "nz-waca-adjustments.gpkg", "nz_waca_adjustments:mytable", ]) assert r.exit_code == 0, r.stderr # Now replace with a table which # * doesn't include the `survey_reference` column # * has the columns in a different order # * has a new column db = geopackage(data / "nz-waca-adjustments.gpkg") dbcur = db.cursor() dbcur.execute(""" CREATE TABLE IF NOT EXISTS "nz_waca_adjustments_2" ( "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "geom" MULTIPOLYGON, "date_adjusted" DATETIME, "adjusted_nodes" MEDIUMINT, "newcolumn" TEXT ); INSERT INTO nz_waca_adjustments_2 (id, geom, date_adjusted, adjusted_nodes, newcolumn) SELECT id, geom, date_adjusted, adjusted_nodes, NULL FROM nz_waca_adjustments ; DROP TABLE nz_waca_adjustments; ALTER TABLE nz_waca_adjustments_2 RENAME TO nz_waca_adjustments; """) r = cli_runner.invoke([ "import", "--replace-existing", data / "nz-waca-adjustments.gpkg", "nz_waca_adjustments:mytable", ]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke(["show", "-o", "json"]) assert r.exit_code == 0, r.stderr diff = json.loads(r.stdout)["sno.diff/v1+hexwkb"]["mytable"] # The schema changed, but the features didn't. assert diff["meta"]["schema.json"] assert not diff.get("feature") repo = pygit2.Repository(str(repo_path)) head_rs = RepositoryStructure.lookup(repo, "HEAD") old_rs = RepositoryStructure.lookup(repo, "HEAD^") assert head_rs.tree != old_rs.tree new_feature_tree = head_rs.tree / "mytable/.sno-dataset/feature" old_feature_tree = old_rs.tree / "mytable/.sno-dataset/feature" assert new_feature_tree == old_feature_tree
def test_commit( repo_version, archive, layer, partial, data_working_copy, geopackage, cli_runner, request, edit_points, edit_polygons, edit_table, ): """ commit outstanding changes from the working copy """ versioned_archive = archive + "2" if repo_version == "2" else archive with data_working_copy(versioned_archive) as (repo_dir, wc_path): # empty r = cli_runner.invoke(["commit", "-m", "test-commit-empty"]) assert r.exit_code == NO_CHANGES, r assert r.stderr.splitlines() == ["Error: No changes to commit"] # empty r = cli_runner.invoke(["commit", "-m", "test-commit-empty", "--allow-empty"]) assert r.exit_code == 0, r # make some changes db = geopackage(wc_path) with db: cur = db.cursor() try: edit_func = locals()[f"edit_{archive}"] pk_del = edit_func(cur) except KeyError: raise NotImplementedError(f"No edit_{archive}") print(f"deleted fid={pk_del}") repo = pygit2.Repository(str(repo_dir)) rs = RepositoryStructure(repo) wc = rs.working_copy original_change_count = _count_tracking_table_changes(db, wc, layer) if partial: r = cli_runner.invoke( ["commit", "-m", "test-commit-1", "-o", "json", f"{layer}:{pk_del}"] ) else: r = cli_runner.invoke(["commit", "-m", "test-commit-1", "-o", "json"]) assert r.exit_code == 0, r commit_id = json.loads(r.stdout)["sno.commit/v1"]["commit"] print("commit:", commit_id) assert str(repo.head.target) == commit_id commit = repo.head.peel(pygit2.Commit) assert commit.message == "test-commit-1" assert time.time() - commit.commit_time < 3 dataset = rs[layer] tree = repo.head.peel(pygit2.Tree) assert dataset.encode_1pk_to_path(pk_del) not in tree wc = WorkingCopy.get(repo) wc.assert_db_tree_match(tree) change_count = _count_tracking_table_changes(db, wc, layer) if partial: # All but one change should still be in the tracking table assert change_count == original_change_count - 1 # Changes should still be visible in the working copy: r = cli_runner.invoke(["diff", "--exit-code"]) assert r.exit_code == 1, r assert r.stdout != "" else: assert ( change_count == 0 ), f"Changes still listed in {wc.TRACKING_TABLE} after full commit" r = cli_runner.invoke(["diff", "--exit-code"]) assert r.exit_code == 0, r assert r.stdout == ""
def test_resolve_with_version(repo_version, create_conflicts, cli_runner): with create_conflicts(H.POLYGONS, repo_version) as repo: r = cli_runner.invoke(["merge", "theirs_branch", "-o", "json"]) assert r.exit_code == 0, r assert json.loads(r.stdout)["sno.merge/v1"]["conflicts"] assert RepoState.get_state(repo) == RepoState.MERGING # Can't just complete the merge until we resolve the conflicts. r = cli_runner.invoke(["merge", "--continue"]) assert r.exit_code == INVALID_OPERATION conflict_ids = get_conflict_ids(cli_runner) resolutions = iter(["ancestor", "ours", "theirs", "delete"]) # Keep track of which order we resolve the conflicts - each conflict # resolved will have a primary key, and we resolve conflicts in # primary key order, but the primary keys are not contiguous. pk_order = [] # Each conflict also has an internal "conflict" key - just its index # in the original list of conflicts - these are contiguous, but # we don't necessarily resolve the conflicts in this order. ck_order = [] while conflict_ids: num_conflicts = len(conflict_ids) conflict_id = conflict_ids[0] pk = conflict_id.split(":", 2)[2] pk_order += [pk] r = cli_runner.invoke( ["resolve", conflict_id, f"--with={next(resolutions)}"]) assert r.exit_code == 0, r conflict_ids = get_conflict_ids(cli_runner) assert len(conflict_ids) == num_conflicts - 1 resolved_keys = MergeIndex.read_from_repo(repo).resolves.keys() ck_order += [k for k in resolved_keys if k not in ck_order] assert len(conflict_ids) == 0 merge_index = MergeIndex.read_from_repo(repo) assert len(merge_index.entries) == 242 assert len(merge_index.conflicts) == 4 assert len(merge_index.resolves) == 4 ck0, ck1, ck2, ck3 = ck_order # Conflict ck0 is resolved to ancestor, but the ancestor is None. assert merge_index.resolves[ck0] == [] assert merge_index.conflicts[ck0].ancestor is None assert merge_index.resolves[ck1] == [merge_index.conflicts[ck1].ours] assert merge_index.resolves[ck2] == [merge_index.conflicts[ck2].theirs] assert merge_index.resolves[ck3] == [] r = cli_runner.invoke(["merge", "--continue", "-m", "merge commit"]) assert r.exit_code == 0, r assert repo.head.peel(pygit2.Commit).message == "merge commit" assert RepoState.get_state(repo) != RepoState.MERGING merged = RepositoryStructure.lookup(repo, "HEAD") ours = RepositoryStructure.lookup(repo, "ours_branch") theirs = RepositoryStructure.lookup(repo, "theirs_branch") l = H.POLYGONS.LAYER pk0, pk1, pk2, pk3 = pk_order # Feature at pk0 was resolved to ancestor, which was None. assert get_json_feature(merged, l, pk0) is None assert get_json_feature(merged, l, pk1) == get_json_feature(ours, l, pk1) assert get_json_feature(merged, l, pk2) == get_json_feature(theirs, l, pk2) assert get_json_feature(merged, l, pk3) is None
def test_resolve_with_file(repo_version, create_conflicts, cli_runner): with create_conflicts(H.POLYGONS, repo_version) as repo: r = cli_runner.invoke( ["diff", "ancestor_branch..ours_branch", "-o", "geojson"]) assert r.exit_code == 0, r ours_geojson = json.loads(r.stdout)["features"][0] assert ours_geojson["id"] == "I::98001" r = cli_runner.invoke( ["diff", "ancestor_branch..theirs_branch", "-o", "geojson"]) assert r.exit_code == 0, r theirs_geojson = json.loads(r.stdout)["features"][0] assert theirs_geojson["id"] == "I::98001" r = cli_runner.invoke(["merge", "theirs_branch", "-o", "json"]) assert r.exit_code == 0, r assert json.loads(r.stdout)["sno.merge/v1"]["conflicts"] r = cli_runner.invoke(["conflicts", "-s", "-o", "json"]) assert r.exit_code == 0, r conflicts = json.loads(r.stdout)["sno.conflicts/v1"] add_add_conflict_pk = conflicts[H.POLYGONS.LAYER]["feature"][0] assert add_add_conflict_pk == 98001 # These IDs are irrelevant, but we change them to at least be unique. ours_geojson["id"] = "ours-feature" theirs_geojson["id"] = "theirs-feature" # Changing this ID means the two features no long conflict. theirs_geojson["properties"]["id"] = 98002 resolution = { "features": [ours_geojson, theirs_geojson], "type": "FeatureCollection", } write_repo_file(repo, "resolution.geojson", json.dumps(resolution)) r = cli_runner.invoke([ "resolve", f"{H.POLYGONS.LAYER}:feature:98001", "--with-file=resolution.geojson", ]) assert r.exit_code == 0, r merge_index = MergeIndex.read_from_repo(repo) assert len(merge_index.entries) == 242 assert len(merge_index.conflicts) == 4 assert len(merge_index.resolves) == 1 ck = next(iter(merge_index.resolves.keys())) assert len(merge_index.resolves[ck]) == 2 # Resolved with 2 features delete_remaining_conflicts(cli_runner) r = cli_runner.invoke(["merge", "--continue", "-m", "merge commit"]) assert r.exit_code == 0, r assert repo.head.peel(pygit2.Commit).message == "merge commit" assert RepoState.get_state(repo) != RepoState.MERGING merged = RepositoryStructure.lookup(repo, "HEAD") ours = RepositoryStructure.lookup(repo, "ours_branch") theirs = RepositoryStructure.lookup(repo, "theirs_branch") l = H.POLYGONS.LAYER # Both features are present in the merged repo, ours at 98001 and theirs at 98002. assert get_json_feature(merged, l, 98001) == get_json_feature(ours, l, 98001) # Theirs feature is slightly different - it has a new primary key. assert get_json_feature(merged, l, 98002) != get_json_feature( theirs, l, 98001) modified_theirs_json = get_json_feature(theirs, l, 98001) modified_theirs_json["id"] = 98002 assert get_json_feature(merged, l, 98002) == modified_theirs_json