def test_categorical_value_counts(num_elements): from string import ascii_letters, digits # create categorical series np.random.seed(12) pd_cat = pd.Categorical( pd.Series( np.random.choice(list(ascii_letters + digits), num_elements), dtype='category' ) ) # gdf gdf = DataFrame() gdf['a'] = Series.from_categorical(pd_cat) gdf_value_counts = gdf['a'].value_counts() # pandas pdf = pd.DataFrame() pdf['a'] = pd_cat pdf_value_counts = pdf['a'].value_counts() # verify pandas_dict = pdf_value_counts.to_dict() gdf_dict = gdf_value_counts.to_pandas().to_dict() assert pandas_dict == gdf_dict
def test_categorical_unique_count(nelem): from string import ascii_letters, digits # create categorical series np.random.seed(12) pd_cat = pd.Categorical( pd.Series(np.random.choice(list(ascii_letters + digits), nelem), dtype='category')) # gdf gdf = DataFrame() gdf['a'] = Series.from_categorical(pd_cat) gdf_unique_count = gdf['a'].unique_count() # pandas pdf = pd.DataFrame() pdf['a'] = pd_cat pdf_unique = pdf['a'].unique() # verify assert gdf_unique_count == len(pdf_unique)
def test_categorical_unique(num_elements): from string import ascii_letters, digits # create categorical series np.random.seed(12) pd_cat = pd.Categorical( pd.Series(np.random.choice(list(ascii_letters + digits), num_elements), dtype='category')) # gdf gdf = DataFrame() gdf['a'] = Series.from_categorical(pd_cat) gdf_unique_sorted = np.sort(gdf['a'].unique()) # pandas pdf = pd.DataFrame() pdf['a'] = pd_cat pdf_unique_sorted = np.sort(pdf['a'].unique()) # verify np.testing.assert_array_equal(pdf_unique_sorted, gdf_unique_sorted)