Example #1
0
def test_assign_with_groupby2():
    DT = dt.Frame(A=range(5), B=[1, 1, 2, 2, 2])
    DT[:, "C", by(f.B)] = f.A - dt.mean(f.A)
    assert_equals(
        DT, dt.Frame(A=range(5),
                     B=[1, 1, 2, 2, 2],
                     C=[-0.5, 0.5, -1.0, 0, 1.0]))
Example #2
0
def test_groupby():
    DT = dt.Frame(A=[1, 1, 1, 2, 2, 2],
                  B=[
                      d(2001, 7, 12, 0, 0, 0),
                      d(2005, 3, 14, 15, 9, 26), None,
                      d(2007, 11, 2, 19, 7, 38),
                      d(1965, 6, 19, 2, 17, 7),
                      d(2004, 4, 18, 12, 3, 31)
                  ])
    RES = DT[:, {
        "count": dt.count(f.B),
        "min": dt.min(f.B),
        "max": dt.max(f.B),
        "mean": dt.mean(f.B),
        "first": dt.first(f.B),
        "last": dt.last(f.B)
    },
             dt.by(f.A)]
    assert_equals(
        RES,
        dt.Frame(A=[1, 2],
                 count=[2, 3] / dt.int64,
                 min=[d(2001, 7, 12, 0, 0, 0),
                      d(1965, 6, 19, 2, 17, 7)],
                 max=[d(2005, 3, 14, 15, 9, 26),
                      d(2007, 11, 2, 19, 7, 38)],
                 mean=[
                     d(2003, 5, 13, 19, 34, 43),
                     d(1992, 7, 13, 19, 9, 25, 333333)
                 ],
                 first=[d(2001, 7, 12, 0, 0, 0),
                        d(2007, 11, 2, 19, 7, 38)],
                 last=[None, d(2004, 4, 18, 12, 3, 31)]))
Example #3
0
def test_dt_nunique_with_by_for_ungroupped():
    DT = dt.Frame(G=[1, 1, 1, 2, 2, 2], V=[None, None, None, None, 3, 5])
    EXP = dt.Frame(G=[1, 2], V1=[0, 2] / dt.int64, V2=[0, 1] / dt.int64)
    RES = DT[:, {
        "V1": dt.nunique(f.V),
        "V2": dt.nunique(dt.mean(f.V))
    }, dt.by(f.G)]
    assert_equals(EXP, RES)
Example #4
0
def test_update_with_groupby():
    DT = dt.Frame(A=range(5), B=[1, 1, 2, 2, 2])
    DT[:, update(C=7, D=dt.mean(f.A), E=f.A + 1), by(f.B)]
    assert_equals(
        DT,
        dt.Frame(A=range(5),
                 B=[1, 1, 2, 2, 2],
                 C=[7] * 5,
                 D=[0.5, 0.5, 3.0, 3.0, 3.0],
                 E=range(1, 6)))
Example #5
0
def test_reducers():
    DT = dt.Frame(TIME=[
        d(2001, 7, 12, 0, 0, 0),
        d(2005, 3, 14, 15, 9, 26), None,
        d(2007, 11, 2, 19, 7, 38),
        d(1965, 6, 19, 2, 17, 7),
        d(2004, 4, 18, 12, 3, 31)
    ])
    RES = DT[:, {
        "count": dt.count(f.TIME),
        "min": dt.min(f.TIME),
        "max": dt.max(f.TIME),
        "mean": dt.mean(f.TIME),
        "first": dt.first(f.TIME),
        "last": dt.last(f.TIME)
    }]
    assert_equals(
        RES,
        dt.Frame(count=[5] / dt.int64,
                 min=[d(1965, 6, 19, 2, 17, 7)],
                 max=[d(2007, 11, 2, 19, 7, 38)],
                 mean=[d(1996, 11, 12, 4, 55, 32, 400000)],
                 first=[d(2001, 7, 12, 0, 0, 0)],
                 last=[d(2004, 4, 18, 12, 3, 31)]))
# ####  Number of unique observations per column

# In[3]:

# DT[, lapply(.SD, uniqueN)] --> Rdatatable

DT.nunique()

# #### Mean of all columns by `species`

# In[4]:

# DT[, lapply(.SD, mean), by = species] --> Rdatatable

DT[:, dt.mean(f[:]), by('species')]

# ### __Filtering__

# #### First two observations by species

# In[5]:

# DT[, .SD[1:2], by = species]

DT[:2, :, by('species')]

# In [datatable](https://datatable.readthedocs.io/en/latest/index.html), rows are selected in the `i` section after the grouping, unlike in R's [data.table](https://github.com/Rdatatable/data.table), where rows are selected in `i` before grouping, and rows selected in the `.SD` after grouping.

# #### Last two observations by `species`
Example #7
0
def test_dt_count_na2():
    DT = dt.Frame(G=[1, 1, 1, 2, 2, 2], V=[None, None, None, None, 3, 5])
    EXP = dt.Frame(G=[1, 2], V1=[3, 1], V2=[3, 0])
    RES = DT[:, [dt.countna(f.V), dt.countna(dt.mean(f.V))], dt.by(f.G)]
    assert EXP.to_list() == RES.to_list()