def test_categorical_set_categories(): cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"]) psr = pd.Series(cat) sr = Series.from_categorical(cat) # adding category expect = psr.cat.set_categories(["a", "b", "c", "d"]) got = sr.cat.set_categories(["a", "b", "c", "d"]) assert_eq(expect, got) # removing category expect = psr.cat.set_categories(["a", "b"]) got = sr.cat.set_categories(["a", "b"]) assert_eq(expect, got)
def test_categorical_unique_count(nelem): from string import ascii_letters, digits # create categorical series np.random.seed(12) pd_cat = pd.Categorical( pd.Series( np.random.choice(list(ascii_letters + digits), nelem), dtype="category", )) # gdf gdf = DataFrame() gdf["a"] = Series.from_categorical(pd_cat) gdf_unique_count = gdf["a"].nunique() # pandas pdf = pd.DataFrame() pdf["a"] = pd_cat pdf_unique = pdf["a"].unique() # verify assert gdf_unique_count == len(pdf_unique)
def test_categorical_unique(num_elements): from string import ascii_letters, digits # create categorical series np.random.seed(12) pd_cat = pd.Categorical( pd.Series( np.random.choice(list(ascii_letters + digits), num_elements), dtype="category", )) # gdf gdf = DataFrame() gdf["a"] = Series.from_categorical(pd_cat) gdf_unique_sorted = np.sort(gdf["a"].unique().to_pandas()) # pandas pdf = pd.DataFrame() pdf["a"] = pd_cat pdf_unique_sorted = np.sort(pdf["a"].unique()) # verify np.testing.assert_array_equal(pdf_unique_sorted, gdf_unique_sorted)