def test_string_unique(item): ps = pd.Series(item) gs = Series(item) # Pandas `unique` returns a numpy array pres = pd.Series(ps.unique()) # Nvstrings returns sorted unique with `None` placed before other strings pres = pres.sort_values(na_position="first").reset_index(drop=True) gres = gs.unique() assert_eq(pres, gres)
def test_label_encode_dtype(ncats, cat_dtype): s = Series([str(i % ncats) for i in range(ncats + 1)]) cats = s.unique().astype(s.dtype) encoded_col = s.label_encoding(cats=cats) np.testing.assert_equal(encoded_col.dtype, cat_dtype)