def test_index_normalize_remove_values(inplace): original_index = ExplicitSecondaryIndex(column="col", dtype=pa.int64(), index_dct={ 1: ["a", "b", "c"], 2: ["d"] }) new_index1 = original_index.copy().remove_values([1, 3], inplace=inplace) expected_index1 = ExplicitSecondaryIndex(column="col", dtype=pa.int64(), index_dct={2: ["d"]}) assert new_index1 == expected_index1 new_index2 = original_index.copy().remove_values([1.0, 3.0], inplace=inplace) expected_index2 = ExplicitSecondaryIndex(column="col", dtype=pa.int64(), index_dct={2: ["d"]}) assert new_index2 == expected_index2 new_index3 = original_index.copy().remove_values(["1", "3"], inplace=inplace) expected_index3 = ExplicitSecondaryIndex(column="col", dtype=pa.int64(), index_dct={2: ["d"]}) assert new_index3 == expected_index3
def test_storage_key_after_update(inplace): """ Assert that the storage key is not set after mutation of the index object """ original_index = ExplicitSecondaryIndex( column="col", index_dct={1: ["part_1", "part_2"], 3: ["part_3"]}, index_storage_key="storage_key", ) updated_index = original_index.remove_partitions([], inplace=inplace) assert updated_index.index_storage_key == "storage_key" updated_index = original_index.remove_partitions(["part_1"], inplace=inplace) assert updated_index.index_storage_key is None original_index = ExplicitSecondaryIndex( column="col", index_dct={1: ["part_1", "part_2"], 3: ["part_3"]}, index_storage_key="storage_key", ) updated_index = original_index.remove_values([], inplace=inplace) assert updated_index.index_storage_key == "storage_key" updated_index = original_index.remove_values([1], inplace=inplace) assert updated_index.index_storage_key is None original_index = ExplicitSecondaryIndex( column="col", index_dct={1: ["part_1", "part_2"], 3: ["part_3"]}, index_storage_key="storage_key", ) updated_index = original_index.copy() assert updated_index.index_storage_key == "storage_key" updated_index = original_index.copy(column="something_different") assert updated_index.index_storage_key is None
def test_eq_explicit(): def assert_eq(a, b): assert a == b assert b == a assert not (a != b) assert not (b != a) def assert_ne(a, b): assert a != b assert b != a assert not (a == b) assert not (b == a) original_index = ExplicitSecondaryIndex( column="col", index_dct={1: ["part_1"]}, dtype=pa.int64(), index_storage_key="dataset_uuid/some_index.parquet", ) idx1 = original_index.copy() assert_eq(idx1, original_index) idx2 = original_index.copy() idx2.column = "col2" assert_ne(idx2, original_index) idx3 = original_index.copy() idx3.dtype = pa.uint64() assert_ne(idx3, original_index) idx4 = original_index.copy() idx4.index_dct = {1: ["part_1"], 2: ["part_2"]} assert_ne(idx4, original_index) idx5 = original_index.copy() idx5.index_dct = {1: ["part_1", "part_2"]} assert_ne(idx5, original_index) idx6 = original_index.copy() idx6.index_dct = {1: ["part_2"]} assert_ne(idx6, original_index) idx7 = original_index.copy() idx7.index_dct = {2: ["part_1"]} assert_ne(idx7, original_index) idx8 = original_index.copy() idx8.dtype = None assert_ne(idx8, original_index) idx9a = original_index.copy() idx9b = original_index.copy() idx9a.dtype = None idx9b.dtype = None assert_eq(idx9a, idx9b)