Beispiel #1
0
    def test_gb_labels_enum(self):
        # make sure enum groupby keys are displayed as string,  not integer code
        c = Categorical([10, 10, 10, 20, 30, 20, 10, 20, 20], {
            'a': 30,
            'b': 20,
            'c': 10
        })
        c_result = c.count()
        c_labels = c_result[c_result.label_get_names()][0]

        ds = Dataset({'catcol': c, 'data': arange(9)})
        ds_result = ds.gbu('catcol').count()
        ds_labels = ds_result[ds_result.label_get_names()][0]

        assert c_labels.dtype.char == ds_labels.dtype.char
        assert bool(np.all(c_labels == ds_labels))
Beispiel #2
0
    def test_as_categorical(self):
        ds = Dataset({
            'keycol1': np.random.choice(['a', 'b', 'c'], 30),
            'keycol2': np.random.choice(['a', 'b', 'c'], 30),
            'data': np.random.rand(30),
        })

        gbu = ds.gbu('keycol1')
        c = Categorical(ds.keycol1, ordered=False, sort_gb=False)
        cgbu = gbu.as_categorical()

        gbu_result = gbu.sum()
        c_result = c.sum(ds.data)
        cgbu_result = cgbu.sum(ds.data)

        for name, col in gbu_result.items():
            assert bool(np.all(c_result[name] == col))
            assert bool(np.all(cgbu_result[name] == col))
Beispiel #3
0
    61.68,
    72.85,
    91.71,
    61.12,
])
tens = FastArray([10] * 30)
ds = Dataset({
    'strings': str_fa.copy(),
    'ints': int_fa,
    'floats': flt_fa,
    'tens': tens
})
gb = ds.gb('strings')
ds_nums = Dataset({'ints': int_fa, 'floats': flt_fa, 'tens': tens})
data_to_compare = ['ints', 'floats', 'tens']
gbu = ds.gbu('strings')

gb_funcs_L1 = [
    'sum',
    'mean',
    'min',
    'max',
    'var',
    'std',
    'nansum',
    'nanmean',
    'nanmin',
    'nanmax',
    'nanvar',
    'nanstd',
]