Exemple #1
0
def test_ifelse_with_groupby():
    DT = dt.Frame(A=[2, 5, 2, 5, 2, 2], B=range(6))
    R1 = DT[:, ifelse(f.A == 2, dt.min(f.B), dt.max(f.B)), by(f.A)]
    R2 = DT[:, ifelse(f.A == 2, f.B, dt.max(f.B)), by(f.A)]
    R3 = DT[:, ifelse(f.A == 2, dt.min(f.B), f.B), by(f.A)]
    R4 = DT[:, ifelse(f.B > 2, dt.min(f.B), f.B), by(f.A)]
    assert_equals(R1, dt.Frame(A=[2, 5], C0=[0, 3]))
    assert_equals(R2, dt.Frame(A=[2, 2, 2, 2, 5, 5], C0=[0, 2, 4, 5, 3, 3]))
    assert_equals(R3, dt.Frame(A=[2, 2, 2, 2, 5, 5], C0=[0, 0, 0, 0, 1, 3]))
    assert_equals(R4, dt.Frame(A=[2, 2, 2, 2, 5, 5], C0=[0, 2, 0, 0, 1, 1]))
Exemple #2
0
def test_groupby():
    DT = dt.Frame(A=[1, 1, 1, 2, 2, 2],
                  B=[
                      d(2001, 7, 12, 0, 0, 0),
                      d(2005, 3, 14, 15, 9, 26), None,
                      d(2007, 11, 2, 19, 7, 38),
                      d(1965, 6, 19, 2, 17, 7),
                      d(2004, 4, 18, 12, 3, 31)
                  ])
    RES = DT[:, {
        "count": dt.count(f.B),
        "min": dt.min(f.B),
        "max": dt.max(f.B),
        "mean": dt.mean(f.B),
        "first": dt.first(f.B),
        "last": dt.last(f.B)
    },
             dt.by(f.A)]
    assert_equals(
        RES,
        dt.Frame(A=[1, 2],
                 count=[2, 3] / dt.int64,
                 min=[d(2001, 7, 12, 0, 0, 0),
                      d(1965, 6, 19, 2, 17, 7)],
                 max=[d(2005, 3, 14, 15, 9, 26),
                      d(2007, 11, 2, 19, 7, 38)],
                 mean=[
                     d(2003, 5, 13, 19, 34, 43),
                     d(1992, 7, 13, 19, 9, 25, 333333)
                 ],
                 first=[d(2001, 7, 12, 0, 0, 0),
                        d(2007, 11, 2, 19, 7, 38)],
                 last=[None, d(2004, 4, 18, 12, 3, 31)]))
Exemple #3
0
def test_ifelse_multi_different_grouplevels():
    DT = dt.Frame(A=[1, 2, 3, 4, 5, 6])
    RES = DT[:, ifelse(f.A <= 2, dt.min(f.A),
                       f.A >= 5, dt.max(f.A),
                       f.A == 0, 1000000,
                       f.A)]
    assert_equals(RES, dt.Frame([1, 1, 3, 4, 6, 6]))
Exemple #4
0
 def _get_diff_table(self, df, table, keys=None, in_range=None, **params):
     if keys is None:
         keys = df.names
     if in_range:
         where = "where `{col}` >= {min} and `{col}` <= {max}".format(
             col=in_range,
             min=df[:, dt.min(f[in_range])][0, 0],
             max=df[:, dt.max(f[in_range])][0, 0],
         )
     else:
         where = ""
     query = "select {cols} from {table} {where};".format(
         cols=self._sql_cols(keys), table=table, where=where)
     res = self.query(query, to_pandas=False, **params)
     return res
Exemple #5
0
def test_date32_in_groupby():
    DT = dt.Frame(A=[1, 2, 3]*1000, B=list(range(3000)), stypes={"B": "date32"})
    RES = DT[:, {"count": dt.count(f.B),
                 "min": dt.min(f.B),
                 "max": dt.max(f.B),
                 "first": dt.first(f.B),
                 "last": dt.last(f.B)},
            dt.by(f.A)]
    date32 = dt.stype.date32
    assert_equals(RES,
        dt.Frame(A=[1, 2, 3],
                 count = [1000] * 3 / dt.int64,
                 min = [0, 1, 2] / date32,
                 max = [2997, 2998, 2999] / date32,
                 first = [0, 1, 2] / date32,
                 last = [2997, 2998, 2999] / date32))
Exemple #6
0
def test_reducers():
    DT = dt.Frame(TIME=[
        d(2001, 7, 12, 0, 0, 0),
        d(2005, 3, 14, 15, 9, 26), None,
        d(2007, 11, 2, 19, 7, 38),
        d(1965, 6, 19, 2, 17, 7),
        d(2004, 4, 18, 12, 3, 31)
    ])
    RES = DT[:, {
        "count": dt.count(f.TIME),
        "min": dt.min(f.TIME),
        "max": dt.max(f.TIME),
        "mean": dt.mean(f.TIME),
        "first": dt.first(f.TIME),
        "last": dt.last(f.TIME)
    }]
    assert_equals(
        RES,
        dt.Frame(count=[5] / dt.int64,
                 min=[d(1965, 6, 19, 2, 17, 7)],
                 max=[d(2007, 11, 2, 19, 7, 38)],
                 mean=[d(1996, 11, 12, 4, 55, 32, 400000)],
                 first=[d(2001, 7, 12, 0, 0, 0)],
                 last=[d(2004, 4, 18, 12, 3, 31)]))
Exemple #7
0
def test_minmax_with_by():
    expect_min = dt.Frame({'A': [1, 2, 3], 'B': [2, 1, 1]}, stype='int32')
    expect_max = dt.Frame({'A': [1, 2, 3], 'B': [20, 6, 22]}, stype='int32')

    assert_equals(df[:, dt.min(f.B), by("A")], expect_min)
    assert_equals(df[:, dt.max(f.B), by("A")], expect_max)
Exemple #8
0
def test_minmax_with_column_dict():
    expect_min = dt.Frame({'A_min': [1], 'B_min': [1]}, stype='int32')
    expect_max = dt.Frame({'A_max': [3], 'B_max': [22]}, stype='int32')

    assert_equals(df[:, dt.min({"A_min": f.A, "B_min": f.B})], expect_min)
    assert_equals(df[:, dt.max({"A_max": f.A, "B_max": f.B})], expect_max)
Exemple #9
0
def test_minmax_with_column_list():
    expect_min = dt.Frame({'A': [1], 'B': [1]}, stype='int32')
    expect_max = dt.Frame({'A': [3], 'B': [22]}, stype='int32')

    assert_equals(df[:, dt.min([f.A, f.B])], expect_min)
    assert_equals(df[:, dt.max([f.A, f.B])], expect_max)
Exemple #10
0
def test_slice_of_columns():
    expect_min = dt.Frame({'A': [1], 'B': [1]}, stype='int32')
    expect_max = dt.Frame({'A': [3], 'B': [22]}, stype='int32')

    assert_equals(df[:, dt.min(f[:])], expect_min)
    assert_equals(df[:, dt.max(f[:])], expect_max)
Exemple #11
0
def test_list_of_minmax():
    expect_min = dt.Frame({'A': [1], 'B': [1]}, stype='int32')
    expect_max = dt.Frame({'A': [3], 'B': [22]}, stype='int32')

    assert_equals(df[:, [dt.min(f.A), dt.min(f.B)]], expect_min)
    assert_equals(df[:, [dt.max(f.A), dt.max(f.B)]], expect_max)
Exemple #12
0
def test_single_column():
    expect_min = dt.Frame({'B': [1]}, stype='int32')
    expect_max = dt.Frame({'B': [22]}, stype='int32')

    assert_equals(df[:, dt.min(f.B)], expect_min)
    assert_equals(df[:, dt.max(f.B)], expect_max)
Exemple #13
0
def test_ymd_partial_groupby():
    DT = dt.Frame(A=range(5), B=range(5), C=range(1, 11, 2))
    RES = DT[:, ymd(2000, dt.max(f.B), f.C)]
    assert_equals(RES, dt.Frame([d(2000, 4, 1 + 2 * i) for i in range(5)]))