def test_ifelse_with_groupby(): DT = dt.Frame(A=[2, 5, 2, 5, 2, 2], B=range(6)) R1 = DT[:, ifelse(f.A == 2, dt.min(f.B), dt.max(f.B)), by(f.A)] R2 = DT[:, ifelse(f.A == 2, f.B, dt.max(f.B)), by(f.A)] R3 = DT[:, ifelse(f.A == 2, dt.min(f.B), f.B), by(f.A)] R4 = DT[:, ifelse(f.B > 2, dt.min(f.B), f.B), by(f.A)] assert_equals(R1, dt.Frame(A=[2, 5], C0=[0, 3])) assert_equals(R2, dt.Frame(A=[2, 2, 2, 2, 5, 5], C0=[0, 2, 4, 5, 3, 3])) assert_equals(R3, dt.Frame(A=[2, 2, 2, 2, 5, 5], C0=[0, 0, 0, 0, 1, 3])) assert_equals(R4, dt.Frame(A=[2, 2, 2, 2, 5, 5], C0=[0, 2, 0, 0, 1, 1]))
def test_groupby(): DT = dt.Frame(A=[1, 1, 1, 2, 2, 2], B=[ d(2001, 7, 12, 0, 0, 0), d(2005, 3, 14, 15, 9, 26), None, d(2007, 11, 2, 19, 7, 38), d(1965, 6, 19, 2, 17, 7), d(2004, 4, 18, 12, 3, 31) ]) RES = DT[:, { "count": dt.count(f.B), "min": dt.min(f.B), "max": dt.max(f.B), "mean": dt.mean(f.B), "first": dt.first(f.B), "last": dt.last(f.B) }, dt.by(f.A)] assert_equals( RES, dt.Frame(A=[1, 2], count=[2, 3] / dt.int64, min=[d(2001, 7, 12, 0, 0, 0), d(1965, 6, 19, 2, 17, 7)], max=[d(2005, 3, 14, 15, 9, 26), d(2007, 11, 2, 19, 7, 38)], mean=[ d(2003, 5, 13, 19, 34, 43), d(1992, 7, 13, 19, 9, 25, 333333) ], first=[d(2001, 7, 12, 0, 0, 0), d(2007, 11, 2, 19, 7, 38)], last=[None, d(2004, 4, 18, 12, 3, 31)]))
def test_ifelse_multi_different_grouplevels(): DT = dt.Frame(A=[1, 2, 3, 4, 5, 6]) RES = DT[:, ifelse(f.A <= 2, dt.min(f.A), f.A >= 5, dt.max(f.A), f.A == 0, 1000000, f.A)] assert_equals(RES, dt.Frame([1, 1, 3, 4, 6, 6]))
def _get_diff_table(self, df, table, keys=None, in_range=None, **params): if keys is None: keys = df.names if in_range: where = "where `{col}` >= {min} and `{col}` <= {max}".format( col=in_range, min=df[:, dt.min(f[in_range])][0, 0], max=df[:, dt.max(f[in_range])][0, 0], ) else: where = "" query = "select {cols} from {table} {where};".format( cols=self._sql_cols(keys), table=table, where=where) res = self.query(query, to_pandas=False, **params) return res
def test_date32_in_groupby(): DT = dt.Frame(A=[1, 2, 3]*1000, B=list(range(3000)), stypes={"B": "date32"}) RES = DT[:, {"count": dt.count(f.B), "min": dt.min(f.B), "max": dt.max(f.B), "first": dt.first(f.B), "last": dt.last(f.B)}, dt.by(f.A)] date32 = dt.stype.date32 assert_equals(RES, dt.Frame(A=[1, 2, 3], count = [1000] * 3 / dt.int64, min = [0, 1, 2] / date32, max = [2997, 2998, 2999] / date32, first = [0, 1, 2] / date32, last = [2997, 2998, 2999] / date32))
def test_reducers(): DT = dt.Frame(TIME=[ d(2001, 7, 12, 0, 0, 0), d(2005, 3, 14, 15, 9, 26), None, d(2007, 11, 2, 19, 7, 38), d(1965, 6, 19, 2, 17, 7), d(2004, 4, 18, 12, 3, 31) ]) RES = DT[:, { "count": dt.count(f.TIME), "min": dt.min(f.TIME), "max": dt.max(f.TIME), "mean": dt.mean(f.TIME), "first": dt.first(f.TIME), "last": dt.last(f.TIME) }] assert_equals( RES, dt.Frame(count=[5] / dt.int64, min=[d(1965, 6, 19, 2, 17, 7)], max=[d(2007, 11, 2, 19, 7, 38)], mean=[d(1996, 11, 12, 4, 55, 32, 400000)], first=[d(2001, 7, 12, 0, 0, 0)], last=[d(2004, 4, 18, 12, 3, 31)]))
def test_minmax_with_by(): expect_min = dt.Frame({'A': [1, 2, 3], 'B': [2, 1, 1]}, stype='int32') expect_max = dt.Frame({'A': [1, 2, 3], 'B': [20, 6, 22]}, stype='int32') assert_equals(df[:, dt.min(f.B), by("A")], expect_min) assert_equals(df[:, dt.max(f.B), by("A")], expect_max)
def test_minmax_with_column_dict(): expect_min = dt.Frame({'A_min': [1], 'B_min': [1]}, stype='int32') expect_max = dt.Frame({'A_max': [3], 'B_max': [22]}, stype='int32') assert_equals(df[:, dt.min({"A_min": f.A, "B_min": f.B})], expect_min) assert_equals(df[:, dt.max({"A_max": f.A, "B_max": f.B})], expect_max)
def test_minmax_with_column_list(): expect_min = dt.Frame({'A': [1], 'B': [1]}, stype='int32') expect_max = dt.Frame({'A': [3], 'B': [22]}, stype='int32') assert_equals(df[:, dt.min([f.A, f.B])], expect_min) assert_equals(df[:, dt.max([f.A, f.B])], expect_max)
def test_slice_of_columns(): expect_min = dt.Frame({'A': [1], 'B': [1]}, stype='int32') expect_max = dt.Frame({'A': [3], 'B': [22]}, stype='int32') assert_equals(df[:, dt.min(f[:])], expect_min) assert_equals(df[:, dt.max(f[:])], expect_max)
def test_list_of_minmax(): expect_min = dt.Frame({'A': [1], 'B': [1]}, stype='int32') expect_max = dt.Frame({'A': [3], 'B': [22]}, stype='int32') assert_equals(df[:, [dt.min(f.A), dt.min(f.B)]], expect_min) assert_equals(df[:, [dt.max(f.A), dt.max(f.B)]], expect_max)
def test_single_column(): expect_min = dt.Frame({'B': [1]}, stype='int32') expect_max = dt.Frame({'B': [22]}, stype='int32') assert_equals(df[:, dt.min(f.B)], expect_min) assert_equals(df[:, dt.max(f.B)], expect_max)