def reset_nessie_server_state() -> None: """Resets the Nessie Server to an initial, clean state for testing.""" # Delete all branches branches = ReferenceSchema().loads(execute_cli_command( ["--json", "branch"]), many=True) for branch in branches: execute_cli_command(["branch", "-d", branch.name]) # Delete all tags tags = ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True) for tag in tags: execute_cli_command(["tag", "-d", tag.name]) # Note: This hash should match the java constant AbstractDatabaseAdapter.NO_ANCESTOR no_ancestor_hash = "2e1cfa82b035c26cbbbdae632cea070514eb8b773f616aaeaf668e2f0be8f10d" # Re-create the main branch from the "root" (a.k.a. no ancestor) hash execute_cli_command( ["branch", "--force", "-o", no_ancestor_hash, "main", "main"]) # Verify the re-created main branch branches = ReferenceSchema().loads(execute_cli_command( ["--json", "branch"]), many=True) assert_that(branches).is_length(1) assert_that(branches[0].name).is_equal_to("main") assert_that(branches[0].hash_).is_equal_to(no_ancestor_hash)
def test_merge() -> None: """Test merge operation.""" runner = CliRunner() _run(runner, ["branch", "dev"]) refs = ReferenceSchema().loads(_run(runner, ["--json", "branch", "-l", "dev"]).output, many=True) empty_hash = next(i.hash_ for i in refs if i.name == "dev") _run( runner, [ "contents", "--set", "foo.bar", "--ref", "dev", "-m", "test_message", "-c", empty_hash, ], input=ContentsSchema().dumps(IcebergTable("uuid", "/a/b/c")), ) refs = ReferenceSchema().loads(_run(runner, ["--json", "branch", "-l", "main"]).output, many=True) main_hash = next(i.hash_ for i in refs if i.name == "main") _run(runner, ["merge", "dev", "-c", main_hash]) result = _run(runner, ["--json", "branch"]) branches = ReferenceSchema().loads(result.output, many=True) refs = {i.name: i.hash_ for i in branches} assert refs["main"] == refs["dev"] _run(runner, ["branch", "dev", "--delete"]) result = _run(runner, ["--json", "log"]) logs = simplejson.loads(result.output) _run(runner, ["--json", "contents", "--delete", "foo.bar", "--ref", "main", "-m", "delete_message", "-c", logs[0]["hash"]]) _run(runner, ["branch", "main", "--delete"]) _run(runner, ["branch", "main"])
def test_assign() -> None: """Test assign operation.""" execute_cli_command(["branch", "dev"]) make_commit("assign.foo.bar", _new_table("test_assign"), "dev") execute_cli_command(["branch", "main", "dev", "--force"]) branches = ReferenceSchema().loads(execute_cli_command(["--json", "branch"]), many=True) refs = {i.name: i.hash_ for i in branches} assert refs["main"] == refs["dev"] execute_cli_command(["tag", "v1.0", "main"]) tags = {i.name: i.hash_ for i in ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True)} assert tags["v1.0"] == refs["main"] execute_cli_command(["tag", "v1.0", "dev", "--force"]) tags = {i.name: i.hash_ for i in ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True)} assert tags["v1.0"] == refs["dev"]
def test_command_line_interface(requests_mock: requests_mock) -> None: """Test the CLI.""" runner = CliRunner() result = runner.invoke(cli.cli) assert result.exit_code == 0 assert "Usage: cli" in result.output help_result = runner.invoke(cli.cli, ["--help"]) assert help_result.exit_code == 0 assert "Usage: cli" in help_result.output help_result = runner.invoke(cli.cli, ["--version"]) assert help_result.exit_code == 0 assert __version__ in help_result.output requests_mock.get( "http://localhost:19120/api/v1/trees", text=json.dumps([{ "name": "main", "type": "BRANCH", "hash": "1234567890abcdef" }]), ) help_result = runner.invoke(cli.cli, ["list-references"]) assert help_result.exit_code == 0 references = ReferenceSchema().loads(help_result.output, many=True) assert len(references) == 1 assert references[0].name == "main" assert references[0].kind == "BRANCH" assert references[0].hash_ == "1234567890abcdef"
def _get_head_branch_hash(branch: str) -> str: refs = { i.name: i.hash_ for i in ReferenceSchema().loads( execute_cli_command(["--json", "branch"]), many=True) } return refs[branch]
def test_tag() -> None: """Test create and assign refs.""" main_hash = ref_hash("main") references = ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True) assert len(references) == 0 execute_cli_command(["tag", "dev-tag", "main"]) references = ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True) assert len(references) == 1 execute_cli_command(["tag", "etl-tag", "main"]) references = ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True) assert len(references) == 2 execute_cli_command(["tag", "dev-hash-tag", main_hash]) references = ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True) assert len(references) == 3 execute_cli_command(["tag", "etl-hash-tag", f"main@{main_hash}"]) references = ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True) assert len(references) == 4 references = ReferenceSchema().loads(execute_cli_command(["--json", "tag", "-l", "etl-tag"]), many=False) assert_that(references.name).is_equal_to("etl-tag") references = simplejson.loads(execute_cli_command(["--json", "tag", "-l", "foo"])) assert len(references) == 0 execute_cli_command(["tag", "-d", "etl-tag"]) execute_cli_command(["tag", "-d", "etl-hash-tag"]) execute_cli_command(["tag", "-d", "dev-tag", "-c", main_hash]) execute_cli_command(["tag", "-d", "dev-hash-tag", "-c", main_hash]) references = ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True) assert len(references) == 0 execute_cli_command(["tag", "v1.0"]) tags = {i.name: i.hash_ for i in ReferenceSchema().loads(execute_cli_command(["--json", "tag"]), many=True)} branches = {i.name: i.hash_ for i in ReferenceSchema().loads(execute_cli_command(["--json", "branch"]), many=True)} assert tags["v1.0"] == branches["main"] execute_cli_command(["branch", "metadata_branch", "main"]) table = _new_table("test_tag_metadata") make_commit("test.tag.metadata", table, "metadata_branch", author="nessie_user1") execute_cli_command(["tag", "metadata_tag", "metadata_branch"]) ref = ReferenceSchema().loads(execute_cli_command(["--json", "tag", "-l", "metadata_tag", "--extended"])) ref_metadata = ref.metadata assert_that(ref_metadata).is_not_none() assert_that(ref_metadata.num_commits_ahead).is_none() assert_that(ref_metadata.num_commits_behind).is_none() assert_that(ref_metadata.num_total_commits).is_equal_to(1) assert_that(ref_metadata.common_ancestor_hash).is_none() assert_that(ref_metadata.commit_meta_of_head).is_not_none()
def test_command_line_interface() -> None: """Test the CLI.""" assert "Usage: nessie" in execute_cli_command([]) assert "Usage: nessie" in execute_cli_command(["--help"]) assert __version__ in execute_cli_command(["--version"]) references = ReferenceSchema().loads(execute_cli_command(["--json", "branch", "-l"]), many=True) assert len(references) == 1 assert references[0].name == "main" assert isinstance(references[0], Branch)
def test_ref() -> None: """Test create and assign refs.""" runner = CliRunner() result = _run(runner, ["--json", "branch"]) references = ReferenceSchema().loads(result.output, many=True) assert len(references) == 1 _run(runner, ["branch", "dev"]) result = _run(runner, ["--json", "branch"]) references = ReferenceSchema().loads(result.output, many=True) assert len(references) == 2 _run(runner, ["branch", "etl", "main"]) result = _run(runner, ["--json", "branch"]) references = ReferenceSchema().loads(result.output, many=True) assert len(references) == 3 _run(runner, ["branch", "-d", "etl"]) _run(runner, ["branch", "-d", "dev"]) result = _run(runner, ["--json", "branch"]) references = ReferenceSchema().loads(result.output, many=True) assert len(references) == 1
def test_tag() -> None: """Test create and assign refs.""" runner = CliRunner() result = _run(runner, ["--json", "tag"]) references = ReferenceSchema().loads(result.output, many=True) assert len(references) == 0 _run(runner, ["tag", "dev-tag", "main"]) result = _run(runner, ["--json", "tag"]) references = ReferenceSchema().loads(result.output, many=True) assert len(references) == 1 _run(runner, ["tag", "etl-tag", "main"]) result = _run(runner, ["--json", "tag"]) references = ReferenceSchema().loads(result.output, many=True) assert len(references) == 2 _run(runner, ["tag", "-d", "etl-tag"]) _run(runner, ["tag", "-d", "dev-tag"]) result = _run(runner, ["--json", "tag"]) references = ReferenceSchema().loads(result.output, many=True) assert len(references) == 0 _run(runner, ["tag", "v1.0"], ret_val=1)
def test_log() -> None: """Test log and log filtering.""" runner = CliRunner() result = _run(runner, ["--json", "log"]) logs = simplejson.loads(result.output) assert len(logs) == 0 refs = ReferenceSchema().loads(_run( runner, ["--json", "branch", "-l", "main"]).output, many=True) empty_hash = refs[0].hash_ _run( runner, [ "contents", "--set", "foo.bar", "--ref", "main", "-m", "test_message", "-c", empty_hash, ], input=ContentsSchema().dumps(IcebergTable("/a/b/c")), ) result = _run(runner, ["--json", "contents", "foo.bar"]) tables = ContentsSchema().loads(result.output, many=True) assert len(tables) == 1 assert tables[0] == IcebergTable("/a/b/c") result = _run(runner, ["--json", "log"]) logs = simplejson.loads(result.output) assert len(logs) == 1 result = _run(runner, ["--json", "log", logs[0]["hash"]]) logs = simplejson.loads(result.output) assert len(logs) == 1 result = _run(runner, ["--json", "contents", "--list"]) entries = EntrySchema().loads(result.output, many=True) assert len(entries) == 1 _run(runner, [ "--json", "contents", "--delete", "foo.bar", "--ref", "main", "-m", "delete_message", "-c", logs[0]["hash"] ]) result = _run(runner, ["--json", "log"]) logs = simplejson.loads(result.output) assert len(logs) == 2 result = _run( runner, ["--json", "log", "{}..{}".format(logs[0]["hash"], logs[1]["hash"])]) logs = simplejson.loads(result.output) assert len(logs) == 1
def test_command_line_interface() -> None: """Test the CLI.""" runner = CliRunner() result = _run(runner, list()) assert "Usage: cli" in result.output help_result = _run(runner, ["--help"]) assert "Usage: cli" in help_result.output help_result = _run(runner, ["--version"]) assert __version__ in help_result.output help_result = _run(runner, ["--json", "branch", "-l"]) references = ReferenceSchema().loads(help_result.output, many=True) assert len(references) == 1 assert references[0].name == "main" assert isinstance(references[0], Branch)
def test_command_line_interface_e2e() -> None: """Test the CLI.""" runner = CliRunner() result = runner.invoke(cli.cli) assert result.exit_code == 0 assert "Usage: cli" in result.output help_result = runner.invoke(cli.cli, ["--help"]) assert help_result.exit_code == 0 assert "Usage: cli" in help_result.output help_result = runner.invoke(cli.cli, ["--version"]) assert help_result.exit_code == 0 assert __version__ in help_result.output help_result = runner.invoke(cli.cli, ["list-references"]) assert help_result.exit_code == 0 branches = ReferenceSchema().loads(help_result.output, many=True) assert len(branches) == 1 assert branches[0].name == "main"
def test_branch() -> None: """Test create and assign refs.""" main_hash = ref_hash("main") references = ReferenceSchema().loads(execute_cli_command(["--json", "branch"]), many=True) assert len(references) == 1 execute_cli_command(["branch", "dev"]) references = ReferenceSchema().loads(execute_cli_command(["--json", "branch"]), many=True) assert len(references) == 2 execute_cli_command(["branch", "etl", "main"]) references = ReferenceSchema().loads(execute_cli_command(["--json", "branch"]), many=True) assert len(references) == 3 execute_cli_command(["branch", "dev_hash", main_hash]) references = ReferenceSchema().loads(execute_cli_command(["--json", "branch"]), many=True) assert len(references) == 4 execute_cli_command(["branch", "etl_hash", f"main@{main_hash}"]) references = ReferenceSchema().loads(execute_cli_command(["--json", "branch"]), many=True) assert len(references) == 5 references = ReferenceSchema().loads(execute_cli_command(["--json", "branch", "-l", "etl"]), many=False) assert_that(references.name).is_equal_to("etl") references = simplejson.loads(execute_cli_command(["--json", "branch", "-l", "foo"])) assert len(references) == 0 table = _new_table("test_branch_metadata") make_commit("test.branch.metadata", table, "dev", author="nessie_user1") branch = ReferenceSchema().loads(execute_cli_command(["--json", "branch", "-l", "dev", "--extended"])) ref_metadata = branch.metadata assert_that(ref_metadata).is_not_none() assert_that(ref_metadata.num_commits_ahead).is_equal_to(1) assert_that(ref_metadata.num_commits_behind).is_equal_to(0) assert_that(ref_metadata.num_total_commits).is_equal_to(1) assert_that(ref_metadata.common_ancestor_hash).is_not_empty() assert_that(ref_metadata.commit_meta_of_head).is_not_none() dev_hash = ref_hash("dev") execute_cli_command(["branch", "-d", "etl"]) execute_cli_command(["branch", "-d", "dev", "-c", dev_hash]) execute_cli_command(["branch", "-d", "etl_hash", "-c", main_hash]) execute_cli_command(["branch", "-d", "dev_hash"]) references = ReferenceSchema().loads(execute_cli_command(["--json", "branch"]), many=True) assert len(references) == 1
def make_commit(key: str, table: Content, branch: str, head_hash: str = None, message: str = "test message", author: str = "nessie test") -> None: """Make commit through Nessie CLI.""" if not head_hash: refs = { i.name: i.hash_ for i in ReferenceSchema().loads( execute_cli_command(["--json", "branch"]), many=True) } head_hash = refs[branch] execute_cli_command( [ "content", "commit", "--stdin", key, "--ref", branch, "-m", message, "-c", head_hash, "--author", author ], input_data=ContentSchema().dumps(table), )
def ref_hash(ref: str) -> str: """Get the hash for a reference.""" refs = ReferenceSchema().loads(execute_cli_command( ["--json", "branch", "-l"]), many=True) return next(i.hash_ for i in refs if i.name == ref)
def test_log() -> None: """Test log and log filtering.""" runner = CliRunner() result = _run(runner, ["--json", "log"]) logs = simplejson.loads(result.output) assert len(logs) == 0 refs = ReferenceSchema().loads(_run( runner, ["--json", "branch", "-l", "main"]).output, many=True) empty_hash = refs[0].hash_ _run( runner, [ "contents", "--set", "foo.bar", "--ref", "main", "-m", "test_message", "-c", empty_hash, "--author", "nessie_user1" ], input=ContentsSchema().dumps(IcebergTable("uuid", "/a/b/c")), ) result = _run(runner, ["--json", "contents", "foo.bar"]) tables = ContentsSchema().loads(result.output, many=True) assert len(tables) == 1 assert tables[0] == IcebergTable("uuid", "/a/b/c") result = _run(runner, ["--json", "log"]) logs = simplejson.loads(result.output) assert len(logs) == 1 result = _run(runner, ["--json", "log", logs[0]["hash"]]) logs = simplejson.loads(result.output) assert len(logs) == 1 result = _run(runner, ["--json", "contents", "--list"]) entries = EntrySchema().loads(result.output, many=True) assert len(entries) == 1 _run( runner, [ "--json", "contents", "--delete", "foo.bar", "--ref", "main", "-m", "delete_message", "-c", logs[0]["hash"], "--author", "nessie_user2", ], ) result = _run(runner, ["--json", "log"]) logs = simplejson.loads(result.output) assert len(logs) == 2 result = _run( runner, ["--json", "log", "{}..{}".format(logs[0]["hash"], logs[1]["hash"])]) logs = simplejson.loads(result.output) assert len(logs) == 1 result = _run(runner, ["--json", "log"]) logs = simplejson.loads(result.output) assert len(logs) == 2 result = _run(runner, ["--json", "log", "--author", "nessie_user1"]) logs = simplejson.loads(result.output) assert len(logs) == 1 result = _run(runner, ["--json", "log", "--author", "nessie_user2"]) logs = simplejson.loads(result.output) assert len(logs) == 1 result = _run(runner, [ "--json", "log", "--author", "nessie_user2", "--author", "nessie_user1" ]) logs = simplejson.loads(result.output) assert len(logs) == 2 # the committer is set on the server-side and is empty if we're not logged # in when performing a commit result = _run(runner, ["--json", "log", "--committer", ""]) logs = simplejson.loads(result.output) assert len(logs) == 2 result = _run(runner, [ "--json", "log", "--query", "commit.author == 'nessie_user2' || commit.author == 'non_existing'" ]) logs = simplejson.loads(result.output) assert len(logs) == 1 result = _run(runner, [ "--json", "log", "--after", "2001-01-01T00:00:00+00:00", "--before", "2999-12-30T23:00:00+00:00" ]) logs = simplejson.loads(result.output) assert len(logs) == 2
def test_transplant() -> None: """Test transplant operation.""" runner = CliRunner() _run(runner, ["branch", "dev"]) refs = ReferenceSchema().loads(_run( runner, ["--json", "branch", "-l", "dev"]).output, many=True) empty_hash = next(i.hash_ for i in refs if i.name == "dev") _run( runner, [ "contents", "--set", "foo.bar", "--ref", "dev", "-m", "test_message", "-c", empty_hash, ], input=ContentsSchema().dumps(IcebergTable("uuid", "/a/b/c")), ) _run( runner, [ "contents", "--set", "bar.bar", "--ref", "dev", "-m", "test_message2", "-c", empty_hash, ], input=ContentsSchema().dumps(IcebergTable("uuid", "/a/b/c")), ) _run( runner, [ "contents", "--set", "foo.baz", "--ref", "main", "-m", "test_message3", "-c", empty_hash, ], input=ContentsSchema().dumps(IcebergTable("uuid", "/a/b/c")), ) refs = ReferenceSchema().loads(_run(runner, ["--json", "branch", "-l"]).output, many=True) main_hash = next(i.hash_ for i in refs if i.name == "main") result = _run(runner, ["--json", "log", "dev"]) logs = simplejson.loads(result.output) first_hash = [i["hash"] for i in logs] _run(runner, ["cherry-pick", "-c", main_hash, first_hash[1], first_hash[0]]) result = _run(runner, ["--json", "log"]) logs = simplejson.loads(result.output) assert len(logs) == 3 _run(runner, [ "--json", "contents", "--delete", "foo.bar", "--ref", "main", "-m", "delete_message", "-c", logs[0]["hash"] ]) _run(runner, ["branch", "dev", "--delete"]) _run(runner, ["branch", "main", "--delete"]) _run(runner, ["branch", "main"])
def test_contents_listing() -> None: """Test contents listing and filtering.""" runner = CliRunner() branch = "contents_listing_dev" _run(runner, ["branch", branch]) iceberg_table = IcebergTable(id="uuid", metadata_location="/a/b/c") delta_lake_table = DeltaLakeTable(id="uuid2", metadata_location_history=["asd"], checkpoint_location_history=["def"], last_checkpoint="x") refs = ReferenceSchema().loads(_run( runner, ["--json", "branch", "-l", branch]).output, many=True) _run( runner, [ "contents", "--set", "this.is.iceberg.foo", "--ref", branch, "-m", "test_message1", "-c", refs[0].hash_ ], input=ContentsSchema().dumps(iceberg_table), ) refs = ReferenceSchema().loads(_run( runner, ["--json", "branch", "-l", branch]).output, many=True) _run( runner, [ "contents", "--set", "this.is.delta.bar", "--ref", branch, "-m", "test_message2", "-c", refs[0].hash_ ], input=ContentsSchema().dumps(delta_lake_table), ) result = _run( runner, ["--json", "contents", "--ref", branch, "this.is.iceberg.foo"]) tables = ContentsSchema().loads(result.output, many=True) assert_that(tables).is_length(1) assert_that(tables[0]).is_equal_to(iceberg_table) result = _run(runner, ["--json", "contents", "--ref", branch, "this.is.delta.bar"]) tables = ContentsSchema().loads(result.output, many=True) assert_that(tables).is_length(1) assert_that(tables[0]).is_equal_to(delta_lake_table) result = _run(runner, [ "--json", "contents", "--ref", branch, "--list", "--type", "ICEBERG_TABLE" ]) tables = EntrySchema().loads(result.output, many=True) assert_that(tables).is_length(1) assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE") result = _run(runner, [ "--json", "contents", "--ref", branch, "--list", "--type", "DELTA_LAKE_TABLE" ]) tables = EntrySchema().loads(result.output, many=True) assert_that(tables).is_length(1) assert_that(tables[0].kind).is_equal_to("DELTA_LAKE_TABLE") result = _run(runner, [ "--json", "contents", "--ref", branch, "--list", "--query", "entry.contentType == 'ICEBERG_TABLE'" ]) tables = EntrySchema().loads(result.output, many=True) assert_that(tables).is_length(1) assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE") result = _run(runner, [ "--json", "contents", "--ref", branch, "--list", "--query", "entry.contentType in ['ICEBERG_TABLE', 'DELTA_LAKE_TABLE']" ]) tables = EntrySchema().loads(result.output, many=True) assert_that(tables).is_length(2) assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE") assert_that(tables[1].kind).is_equal_to("DELTA_LAKE_TABLE") result = _run(runner, [ "--json", "contents", "--ref", branch, "--list", "--query", "entry.namespace.startsWith('this.is.del')" ]) tables = EntrySchema().loads(result.output, many=True) assert_that(tables).is_length(1) assert_that(tables[0].kind).is_equal_to("DELTA_LAKE_TABLE") result = _run(runner, [ "--json", "contents", "--ref", branch, "--list", "--query", "entry.namespace.startsWith('this.is')" ]) tables = EntrySchema().loads(result.output, many=True) assert_that(tables).is_length(2) assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE") assert_that(tables[1].kind).is_equal_to("DELTA_LAKE_TABLE") _run(runner, ["branch", branch, "--delete"])