def test_gb_labels_enum(self): # make sure enum groupby keys are displayed as string, not integer code c = Categorical([10, 10, 10, 20, 30, 20, 10, 20, 20], { 'a': 30, 'b': 20, 'c': 10 }) c_result = c.count() c_labels = c_result[c_result.label_get_names()][0] ds = Dataset({'catcol': c, 'data': arange(9)}) ds_result = ds.gbu('catcol').count() ds_labels = ds_result[ds_result.label_get_names()][0] assert c_labels.dtype.char == ds_labels.dtype.char assert bool(np.all(c_labels == ds_labels))
def test_as_categorical(self): ds = Dataset({ 'keycol1': np.random.choice(['a', 'b', 'c'], 30), 'keycol2': np.random.choice(['a', 'b', 'c'], 30), 'data': np.random.rand(30), }) gbu = ds.gbu('keycol1') c = Categorical(ds.keycol1, ordered=False, sort_gb=False) cgbu = gbu.as_categorical() gbu_result = gbu.sum() c_result = c.sum(ds.data) cgbu_result = cgbu.sum(ds.data) for name, col in gbu_result.items(): assert bool(np.all(c_result[name] == col)) assert bool(np.all(cgbu_result[name] == col))
61.68, 72.85, 91.71, 61.12, ]) tens = FastArray([10] * 30) ds = Dataset({ 'strings': str_fa.copy(), 'ints': int_fa, 'floats': flt_fa, 'tens': tens }) gb = ds.gb('strings') ds_nums = Dataset({'ints': int_fa, 'floats': flt_fa, 'tens': tens}) data_to_compare = ['ints', 'floats', 'tens'] gbu = ds.gbu('strings') gb_funcs_L1 = [ 'sum', 'mean', 'min', 'max', 'var', 'std', 'nansum', 'nanmean', 'nanmin', 'nanmax', 'nanvar', 'nanstd', ]