Ejemplo n.º 1
0
def test_content_commit_with_empty_content_delete() -> None:
    """Test content commit delete with empty content operation."""
    branch = "contents_commit_with_empty_content_delete_dev"
    execute_cli_command(["branch", branch])

    iceberg_table = _create_iceberg_table(
        "test_contents_with_empty_content_delete")

    make_commit("this.is.iceberg.foo", iceberg_table, branch)

    tables = EntrySchema().loads(execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--type",
        "ICEBERG_TABLE"
    ]),
                                 many=True)
    assert_that(tables).is_length(1)

    head_hash = _get_head_branch_hash(branch)

    execute_cli_command(
        [
            CONTENT_COMMAND, "commit", "--stdin", "this.is.iceberg.foo",
            "--ref", branch, "-m", "delete table", "-c", head_hash
        ],
        input_data="",
    )

    tables = EntrySchema().loads(execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--type",
        "ICEBERG_TABLE"
    ]),
                                 many=True)
    assert_that(tables).is_length(0)
Ejemplo n.º 2
0
def test_content_commit_with_edited_data() -> None:
    """Test content commit to edit data operation."""
    branch = "content_commit_with_edited_data_dev"
    execute_cli_command(["branch", branch])

    iceberg_table = _create_iceberg_table(
        "test_content_commit_with_edited_data")
    table_id = "this.is.iceberg.foo"

    make_commit(table_id, iceberg_table, branch)

    edited_iceberg_table = _create_iceberg_table(
        "test_content_commit_with_edited_data", metadata_location="/d/e/f")

    tables = EntrySchema().loads(execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--type",
        "ICEBERG_TABLE"
    ]),
                                 many=True)
    assert_that(tables).is_length(1)

    # If we have the same content data, we expect no commit to happen
    make_commit(table_id, iceberg_table, branch)
    logs = simplejson.loads(execute_cli_command(["--json", "log", branch]))

    assert len(logs) == 1

    # Now we commit a modified table
    make_commit(table_id, edited_iceberg_table, branch)

    tables = EntrySchema().loads(execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--type",
        "ICEBERG_TABLE"
    ]),
                                 many=True)
    assert_that(tables).is_length(1)

    result_table = ContentSchema().loads(execute_cli_command(
        ["--json", CONTENT_COMMAND, "view", "--ref", branch, table_id]),
                                         many=True)

    assert_that(result_table).is_length(1)
    assert_that(result_table[0]).is_equal_to(edited_iceberg_table)
Ejemplo n.º 3
0
def test_log() -> None:
    """Test log and log filtering."""
    runner = CliRunner()
    result = _run(runner, ["--json", "log"])
    logs = simplejson.loads(result.output)
    assert len(logs) == 0
    refs = ReferenceSchema().loads(_run(
        runner, ["--json", "branch", "-l", "main"]).output,
                                   many=True)
    empty_hash = refs[0].hash_
    _run(
        runner,
        [
            "contents",
            "--set",
            "foo.bar",
            "--ref",
            "main",
            "-m",
            "test_message",
            "-c",
            empty_hash,
        ],
        input=ContentsSchema().dumps(IcebergTable("/a/b/c")),
    )
    result = _run(runner, ["--json", "contents", "foo.bar"])
    tables = ContentsSchema().loads(result.output, many=True)
    assert len(tables) == 1
    assert tables[0] == IcebergTable("/a/b/c")
    result = _run(runner, ["--json", "log"])
    logs = simplejson.loads(result.output)
    assert len(logs) == 1
    result = _run(runner, ["--json", "log", logs[0]["hash"]])
    logs = simplejson.loads(result.output)
    assert len(logs) == 1
    result = _run(runner, ["--json", "contents", "--list"])
    entries = EntrySchema().loads(result.output, many=True)
    assert len(entries) == 1
    _run(runner, [
        "--json", "contents", "--delete", "foo.bar", "--ref", "main", "-m",
        "delete_message", "-c", logs[0]["hash"]
    ])
    result = _run(runner, ["--json", "log"])
    logs = simplejson.loads(result.output)
    assert len(logs) == 2
    result = _run(
        runner,
        ["--json", "log", "{}..{}".format(logs[0]["hash"], logs[1]["hash"])])
    logs = simplejson.loads(result.output)
    assert len(logs) == 1
Ejemplo n.º 4
0
def test_log() -> None:
    """Test log and log filtering."""
    runner = CliRunner()
    result = _run(runner, ["--json", "log"])
    logs = simplejson.loads(result.output)
    assert len(logs) == 0
    refs = ReferenceSchema().loads(_run(
        runner, ["--json", "branch", "-l", "main"]).output,
                                   many=True)
    empty_hash = refs[0].hash_
    _run(
        runner,
        [
            "contents", "--set", "foo.bar", "--ref", "main", "-m",
            "test_message", "-c", empty_hash, "--author", "nessie_user1"
        ],
        input=ContentsSchema().dumps(IcebergTable("uuid", "/a/b/c")),
    )
    result = _run(runner, ["--json", "contents", "foo.bar"])
    tables = ContentsSchema().loads(result.output, many=True)
    assert len(tables) == 1
    assert tables[0] == IcebergTable("uuid", "/a/b/c")
    result = _run(runner, ["--json", "log"])
    logs = simplejson.loads(result.output)
    assert len(logs) == 1
    result = _run(runner, ["--json", "log", logs[0]["hash"]])
    logs = simplejson.loads(result.output)
    assert len(logs) == 1
    result = _run(runner, ["--json", "contents", "--list"])
    entries = EntrySchema().loads(result.output, many=True)
    assert len(entries) == 1
    _run(
        runner,
        [
            "--json",
            "contents",
            "--delete",
            "foo.bar",
            "--ref",
            "main",
            "-m",
            "delete_message",
            "-c",
            logs[0]["hash"],
            "--author",
            "nessie_user2",
        ],
    )
    result = _run(runner, ["--json", "log"])
    logs = simplejson.loads(result.output)
    assert len(logs) == 2
    result = _run(
        runner,
        ["--json", "log", "{}..{}".format(logs[0]["hash"], logs[1]["hash"])])
    logs = simplejson.loads(result.output)
    assert len(logs) == 1
    result = _run(runner, ["--json", "log"])
    logs = simplejson.loads(result.output)
    assert len(logs) == 2
    result = _run(runner, ["--json", "log", "--author", "nessie_user1"])
    logs = simplejson.loads(result.output)
    assert len(logs) == 1
    result = _run(runner, ["--json", "log", "--author", "nessie_user2"])
    logs = simplejson.loads(result.output)
    assert len(logs) == 1
    result = _run(runner, [
        "--json", "log", "--author", "nessie_user2", "--author", "nessie_user1"
    ])
    logs = simplejson.loads(result.output)
    assert len(logs) == 2
    # the committer is set on the server-side and is empty if we're not logged
    # in when performing a commit
    result = _run(runner, ["--json", "log", "--committer", ""])
    logs = simplejson.loads(result.output)
    assert len(logs) == 2
    result = _run(runner, [
        "--json", "log", "--query",
        "commit.author == 'nessie_user2' || commit.author == 'non_existing'"
    ])
    logs = simplejson.loads(result.output)
    assert len(logs) == 1
    result = _run(runner, [
        "--json", "log", "--after", "2001-01-01T00:00:00+00:00", "--before",
        "2999-12-30T23:00:00+00:00"
    ])
    logs = simplejson.loads(result.output)
    assert len(logs) == 2
Ejemplo n.º 5
0
def test_contents_listing() -> None:
    """Test contents listing and filtering."""
    runner = CliRunner()
    branch = "contents_listing_dev"
    _run(runner, ["branch", branch])

    iceberg_table = IcebergTable(id="uuid", metadata_location="/a/b/c")
    delta_lake_table = DeltaLakeTable(id="uuid2",
                                      metadata_location_history=["asd"],
                                      checkpoint_location_history=["def"],
                                      last_checkpoint="x")
    refs = ReferenceSchema().loads(_run(
        runner, ["--json", "branch", "-l", branch]).output,
                                   many=True)
    _run(
        runner,
        [
            "contents", "--set", "this.is.iceberg.foo", "--ref", branch, "-m",
            "test_message1", "-c", refs[0].hash_
        ],
        input=ContentsSchema().dumps(iceberg_table),
    )

    refs = ReferenceSchema().loads(_run(
        runner, ["--json", "branch", "-l", branch]).output,
                                   many=True)
    _run(
        runner,
        [
            "contents", "--set", "this.is.delta.bar", "--ref", branch, "-m",
            "test_message2", "-c", refs[0].hash_
        ],
        input=ContentsSchema().dumps(delta_lake_table),
    )

    result = _run(
        runner, ["--json", "contents", "--ref", branch, "this.is.iceberg.foo"])
    tables = ContentsSchema().loads(result.output, many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0]).is_equal_to(iceberg_table)

    result = _run(runner,
                  ["--json", "contents", "--ref", branch, "this.is.delta.bar"])
    tables = ContentsSchema().loads(result.output, many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0]).is_equal_to(delta_lake_table)

    result = _run(runner, [
        "--json", "contents", "--ref", branch, "--list", "--type",
        "ICEBERG_TABLE"
    ])
    tables = EntrySchema().loads(result.output, many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE")

    result = _run(runner, [
        "--json", "contents", "--ref", branch, "--list", "--type",
        "DELTA_LAKE_TABLE"
    ])
    tables = EntrySchema().loads(result.output, many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0].kind).is_equal_to("DELTA_LAKE_TABLE")

    result = _run(runner, [
        "--json", "contents", "--ref", branch, "--list", "--query",
        "entry.contentType == 'ICEBERG_TABLE'"
    ])
    tables = EntrySchema().loads(result.output, many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE")

    result = _run(runner, [
        "--json", "contents", "--ref", branch, "--list", "--query",
        "entry.contentType in ['ICEBERG_TABLE', 'DELTA_LAKE_TABLE']"
    ])
    tables = EntrySchema().loads(result.output, many=True)
    assert_that(tables).is_length(2)
    assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE")
    assert_that(tables[1].kind).is_equal_to("DELTA_LAKE_TABLE")

    result = _run(runner, [
        "--json", "contents", "--ref", branch, "--list", "--query",
        "entry.namespace.startsWith('this.is.del')"
    ])
    tables = EntrySchema().loads(result.output, many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0].kind).is_equal_to("DELTA_LAKE_TABLE")

    result = _run(runner, [
        "--json", "contents", "--ref", branch, "--list", "--query",
        "entry.namespace.startsWith('this.is')"
    ])
    tables = EntrySchema().loads(result.output, many=True)
    assert_that(tables).is_length(2)
    assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE")
    assert_that(tables[1].kind).is_equal_to("DELTA_LAKE_TABLE")

    _run(runner, ["branch", branch, "--delete"])
Ejemplo n.º 6
0
def test_content_list() -> None:
    """Test content list."""
    branch = "contents_list_dev"
    execute_cli_command(["branch", branch])

    iceberg_table = _create_iceberg_table("test_contents_list")
    delta_lake_table = _create_delta_lake_table("test_dl_table_list")
    iceberg_view = _create_iceberg_view("test_iceberg_view_list")

    make_commit("this.is.iceberg.foo", iceberg_table, branch)
    make_commit("this.is.delta.bar", delta_lake_table, branch)
    make_commit("this.is.sql.baz", iceberg_view, branch)

    tables = EntrySchema().loads(execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--type",
        "ICEBERG_TABLE"
    ]),
                                 many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE")

    branch_hash = ref_hash(branch)
    tables_2 = EntrySchema().loads(execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch_hash, "--type",
        "ICEBERG_TABLE"
    ]),
                                   many=True)
    assert_that(tables_2).is_equal_to(tables)
    tables_2 = EntrySchema().loads(execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", f"{branch}@{branch_hash}",
        "--type", "ICEBERG_TABLE"
    ]),
                                   many=True)
    assert_that(tables_2).is_equal_to(tables)

    tables = EntrySchema().loads(execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--type",
        "DELTA_LAKE_TABLE"
    ]),
                                 many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0].kind).is_equal_to("DELTA_LAKE_TABLE")

    result = execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--filter",
        "entry.contentType == 'ICEBERG_TABLE'"
    ])
    tables = EntrySchema().loads(result, many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0].kind).is_equal_to("ICEBERG_TABLE")

    result = execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--filter",
        "entry.contentType in ['ICEBERG_TABLE', 'DELTA_LAKE_TABLE']"
    ])
    tables = EntrySchema().loads(result, many=True)
    assert_that(tables).is_length(2)
    assert_that(set(t.kind for t in tables)).is_equal_to(
        {"DELTA_LAKE_TABLE", "ICEBERG_TABLE"})

    result = execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--filter",
        "entry.namespace.startsWith('this.is.del')"
    ])
    tables = EntrySchema().loads(result, many=True)
    assert_that(tables).is_length(1)
    assert_that(tables[0].kind).is_equal_to("DELTA_LAKE_TABLE")

    result = execute_cli_command([
        "--json", CONTENT_COMMAND, "list", "--ref", branch, "--filter",
        "entry.namespace.startsWith('this.is')"
    ])
    tables = EntrySchema().loads(result, many=True)
    assert_that(tables).is_length(3)
    assert_that(set(i.kind for i in tables)).is_equal_to(
        {"ICEBERG_TABLE", "ICEBERG_VIEW", "DELTA_LAKE_TABLE"})
Ejemplo n.º 7
0
def test_log() -> None:
    """Test log and log filtering."""
    main_hash = ref_hash("main")

    logs = simplejson.loads(execute_cli_command(["--json", "log"]))
    assert len(logs) == 0
    execute_cli_command(["branch", "dev_test_log"])
    table = _new_table("test_log_dev")
    make_commit("log.foo.dev", table, "dev_test_log", author="nessie_user1")
    table = _new_table("test_log")
    make_commit("log.foo.bar", table, "main", author="nessie_user1", message="commit to main")
    tables = ContentSchema().loads(execute_cli_command(["--json", "content", "view", "log.foo.bar"]), many=True)
    assert len(tables) == 1
    assert tables[0] == table

    ext_logs: List[LogEntry] = LogEntrySchema().loads(execute_cli_command(["--json", "log", "-x"]), many=True)
    assert (
        len(ext_logs) == 1
        and ext_logs[0].commit_meta.message == "commit to main"
        and ext_logs[0].commit_meta.author == "nessie_user1"
        and ext_logs[0].parent_commit_hash is not None
        and len(ext_logs[0].operations) == 1
        and ext_logs[0].operations[0].key == ContentKey.from_path_string("log.foo.bar")
    )

    simple_logs: List[CommitMeta] = CommitMetaSchema().loads(execute_cli_command(["--json", "log"]), many=True)
    assert len(simple_logs) == 1 and simple_logs[0].message == "commit to main" and simple_logs[0].author == "nessie_user1"

    logs_by_hash: List[CommitMeta] = CommitMetaSchema().loads(execute_cli_command(["--json", "log", simple_logs[0].hash_]), many=True)
    assert_that(logs_by_hash).is_equal_to(simple_logs)

    logs = simplejson.loads(execute_cli_command(["--json", "log"]))
    logs_hash = logs[0]["hash"]
    assert_that(logs).is_length(1)
    assert_that(simplejson.loads(execute_cli_command(["--json", "log", "--revision-range", logs_hash]))).is_equal_to(logs)
    assert_that(simplejson.loads(execute_cli_command(["--json", "log", "--revision-range", f"..{logs_hash}"]))).is_equal_to(logs)
    assert_that(simplejson.loads(execute_cli_command(["--json", "log", f"main@{logs_hash}", "--revision-range", logs_hash]))).is_equal_to(
        logs
    )

    entries = EntrySchema().loads(execute_cli_command(["--json", "content", "list"]), many=True)
    assert_that(entries).is_length(1)
    execute_cli_command(
        [
            "--json",
            "content",
            "commit",
            "log.foo.bar",
            "-R",
            "--ref",
            "main",
            "-m",
            "delete_message",
            "-c",
            logs[0]["hash"],
            "--author",
            "nessie_user2",
        ],
    )
    assert_that(simplejson.loads(execute_cli_command(["--json", "log", "-n", 1]))).is_length(1)
    assert_that(simplejson.loads(execute_cli_command(["--json", "log", "dev_test_log"]))).is_length(1)
    logs = simplejson.loads(execute_cli_command(["--json", "log"]))
    assert_that(logs).is_length(2)
    assert_that(
        simplejson.loads(execute_cli_command(["--json", "log", "--revision-range", "{}..{}".format(logs[0]["hash"], logs[1]["hash"])]))
    ).is_length(1)
    assert_that(simplejson.loads(execute_cli_command(["--json", "log"]))).is_length(2)
    logs = simplejson.loads(execute_cli_command(["--json", "log", "--author", "nessie_user1"]))
    assert_that(logs).is_length(1)
    assert_that(logs[0]["author"]).is_equal_to("nessie_user1")
    logs = simplejson.loads(execute_cli_command(["--json", "log", "--author", "nessie_user2"]))
    assert_that(logs).is_length(1)
    assert_that(logs[0]["author"]).is_equal_to("nessie_user2")
    assert_that(simplejson.loads(execute_cli_command(["--json", "log", "--author", "nessie_user2", "--author", "nessie_user1"]))).is_length(
        2
    )
    # the committer is set on the server-side and is empty if we're not logged
    # in when performing a commit
    assert_that(simplejson.loads(execute_cli_command(["--json", "log", "--committer", ""]))).is_length(2)
    assert_that(
        simplejson.loads(
            execute_cli_command(["--json", "log", "--filter", "commit.author == 'nessie_user2' || commit.author == 'non_existing'"])
        )
    ).is_length(1)
    assert_that(
        simplejson.loads(
            execute_cli_command(["--json", "log", "--after", "2001-01-01T00:00:00+00:00", "--before", "2999-12-30T23:00:00+00:00"])
        )
    ).is_length(2)

    # Specifying a different end-hash via revision-range and ref is forbidden
    execute_cli_command(["--json", "log", main_hash, "--revision-range", logs[0]["hash"]], ret_val=2)
    execute_cli_command(["--json", "log", f"main@{main_hash}", "--revision-range", logs[0]["hash"]], ret_val=2)