def test_div_booleans_integers_floats_random(seed): random.seed(seed) n = 1000 src1 = [random.randint(-100, 100) for _ in range(n)] src2 = [random.randint(0, 1) for _ in range(n)] src3 = [random.random() * 1000 - 500 for _ in range(n)] DT = dt.Frame(x=src1, y=src2 / dt.bool8, z=src3) RES = DT[:, [ f.x / f.y, f.y / f.x, f.y / f.z, f.z / f.y, f.z / f.x, f.x / f.z ]] EXP = dt.Frame([[dt_div(src1[i], src2[i]) for i in range(n)], [dt_div(src2[i], src1[i]) for i in range(n)], [dt_div(src2[i], src3[i]) for i in range(n)], [dt_div(src3[i], src2[i]) for i in range(n)], [dt_div(src3[i], src1[i]) for i in range(n)], [dt_div(src1[i], src3[i]) for i in range(n)]]) assert_equals(RES, EXP)
def test_save_to_jay(tempfile_jay): src = [d(1, 1, 1), d(2001, 12, 13), d(2026, 5, 9), None, d(1956, 11, 11)] DT = dt.Frame(src) DT.to_jay(tempfile_jay) del DT DT2 = dt.fread(tempfile_jay) assert DT2.shape == (5, 1) assert DT2.type == dt.Type.date32 assert DT2.to_list()[0] == src
def test_corr_random(numpy, seed): numpy.random.seed(seed) arr1 = numpy.random.rand(100) arr2 = numpy.random.rand(100) np_corr = numpy.corrcoef(arr1, arr2)[0, 1] DT = dt.Frame([arr1, arr2]) dt_corr = DT[:, corr(f[0], f[1])][0, 0] assert numpy.isclose(np_corr, dt_corr, atol=0, rtol=1e-12)
def test_select_dates(): DT = dt.Frame(A=[12], B=[d(2000, 12, 20)], C=[True]) assert DT.types == [dt.Type.int32, dt.Type.date32, dt.Type.bool8] RES1 = DT[:, f[d]] RES2 = DT[:, d] RES3 = DT[:, dt.ltype.time] assert_equals(RES1, DT['B']) assert_equals(RES2, DT['B']) assert_equals(RES3, DT['B'])
def test_assign_nonexisting_column(): # See #1983: if column `B` is created at a wrong moment in the evaluation # sequence, this may seg.fault DT = dt.Frame(A=range(5)) with pytest.raises(KeyError, match="Column B does not exist in the " "Frame"): DT[:, "B"] = f.B + 1 frame_integrity_check(DT)
def test_slice_wrong_types2(ttype): DT = dt.Frame(A=[None], B=["Hello"], types={"A": ttype}) msg = 'Non-integer expressions cannot be used inside a slice' with pytest.raises(TypeError, match=msg): DT[:, f.B[f.A:]] with pytest.raises(TypeError, match=msg): DT[:, f.B[:f.A]] with pytest.raises(TypeError, match=msg): DT[:, f.B[::f.A]]
def test_ymdt_unnamed(): DT = dt.Frame(Y=[2001, 2003, 2005, 2020, 1960], M=[1, 5, 4, 11, 8], D=[12, 18, 30, 1, 14], h=[7, 14, 22, 23, 12], m=[15, 30, 0, 59, 0], s=[12, 23, 0, 59, 27], ns=[0, 0, 0, 999999000, 123000]) RES = DT[:, ymdt(f.Y, f.M, f.D, f.h, f.m, f.s, f.ns)] assert_equals( RES, dt.Frame([ t(2001, 1, 12, 7, 15, 12), t(2003, 5, 18, 14, 30, 23), t(2005, 4, 30, 22, 0, 0), t(2020, 11, 1, 23, 59, 59, 999999), t(1960, 8, 14, 12, 0, 27, 123) ]))
def test_bool_create_column_forced(): # When the column is forced into type bool, we effectively apply # bool(x) to each value in the list, # with the only exception being `math.nan`, which is converted into None. DT = dt.Frame([True, False, None, 0, 1, 12, 0.0, -0.0, "", "hi", math.nan], type=bool) assert DT.type == dt.Type.bool8 assert DT.to_list() == [[ True, False, None, False, True, True, False, False, False, True, None ]]
def test_sub_booleans_integers_floats_random(seed): random.seed(seed) n = 1000 src1 = [random.randint(-100, 100) for _ in range(n)] src2 = [random.randint(0, 1) for _ in range(n)] src3 = [random.random() * 1000 - 500 for _ in range(n)] DT = dt.Frame(x=src1, y=src2/dt.bool8, z=src3) RES = DT[:, [f.x - f.y, f.y - f.x, f.y - f.z, f.z - f.y, f.z - f.x, f.x - f.z]] EXP = dt.Frame( C0=[src1[i] - src2[i] for i in range(n)], C2=[src2[i] - src3[i] for i in range(n)], C4=[src3[i] - src1[i] for i in range(n)] ) assert_equals(RES[:, [0, 2, 4]], EXP) assert_equals(RES[:, [0, 2, 4]], RES[:, {"C0":-f[1], "C2":-f[3], "C4":-f[5]}])
def test_arr32_arr32_repr(): DT = dt.Frame(V=[[[1, 2, 3], [4, 9]], None, [None], [[-1], [0, 13]]]) assert str(DT) == (" | V \n" " | arr32(arr32(int32))\n" "-- + -------------------\n" " 0 | [[1, 2, 3], [4, 9]]\n" " 1 | NA \n" " 2 | [NA] \n" " 3 | [[-1], [0, 13]] \n" "[4 rows x 1 column]\n")
def test_median_wrong_stype(): DT = dt.Frame(A=["foo"], B=["moo"], stypes={"A": dt.str32, "B": dt.str64}) with pytest.raises(TypeError) as e: noop(DT[:, median(f.A)]) assert ("Unable to apply reduce function median() to a column of " "type str32" in str(e.value)) with pytest.raises(TypeError) as e: noop(DT[:, median(f.B)]) assert ("Unable to apply reduce function median() to a column of " "type str64" in str(e.value))
def test_update_misplaced(): DT = dt.Frame(A=range(5)) with pytest.raises(TypeError, match="Column selector must be an integer " "or a string"): DT[update(B=0)] with pytest.raises(TypeError, match="Invalid item at position 2 in " r"DT\[i, j, \.\.\.\] call"): DT[:, :, update(B=0)]
def test_create_from_python1(): src = [[1, 2, 3], [], [4, 5], [6], None, [7, 8, 10, -1]] DT = dt.Frame(A=src) assert DT.shape == (6, 1) assert DT.type == dt.Type.arr32(dt.Type.int32) assert DT.names == ("A", ) assert DT.ltypes == (dt.ltype.invalid, ) # These properties are deprecated, also assert DT.stypes == (dt.stype.arr32, ) # see issue #3142 assert DT.to_list() == [src]
def test_cast_string_to_date32(ttype): DT = dt.Frame(["2001-02-14", "2012-11-24", "noise", "2022-22-22", "2021-02-29", "2003-04-31", "2020-02-29", "2003-10-01", "1969-12-31", "1970-01-01", "2000-00-00", "2000-01-1", "2000-01-"], stype=ttype) DT[0] = dt.Type.date32 assert_equals(DT, dt.Frame([d(2001, 2, 14), d(2012, 11, 24), None, None, None, None, d(2020, 2, 29), d(2003, 10, 1), d(1969, 12, 31), d(1970, 1, 1), None, None, None]))
def test_date32_relational(): DT = dt.Frame(A=[d(2000, 1, 1), d(2010, 11, 17), None, d(2020, 3, 30), None, d(1998, 5, 14), d(999, 9, 9)], B=[d(2000, 1, 2), d(2010, 11, 17), None, d(2020, 1, 31), d(1998, 5, 14), None, d(999, 9, 9)]) RES = DT[:, {">": f.A > f.B, ">=": f.A >= f.B, "==": f.A == f.B, "!=": f.A != f.B, "<=": f.A <= f.B, "<": f.A < f.B}] assert_equals(RES, dt.Frame({ ">": [False, False, False, True, False, False, False], ">=": [False, True, True, True, False, False, True], "==": [False, True, True, False, False, False, True], "!=": [True, False, False, True, True, True, False], "<=": [True, True, True, False, False, False, True], "<": [True, False, False, False, False, False, False], }))
def test_mean_empty_frame(): DT = dt.Frame([[]] * 4, names=list("ABCD"), stypes=(dt.bool8, dt.int32, dt.float32, dt.float64)) assert DT.shape == (0, 4) RZ = DT[:, mean(f[:])] frame_integrity_check(RZ) assert RZ.shape == (1, 4) assert RZ.names == ("A", "B", "C", "D") assert RZ.stypes == (dt.float64, dt.float64, dt.float32, dt.float64) assert RZ.to_list() == [[None]] * 4
def test_time64_create_from_python(): d = datetime.datetime src = [ d(2000, 10, 18, 3, 30), d(2010, 11, 13, 15, 11, 59), d(2020, 2, 29, 20, 20, 20, 20), None ] DT = dt.Frame(src) assert DT.types == [dt.Type.time64] assert DT.to_list() == [src]
def test_count_dt_groupby_string(): df_in = dt.Frame([None, "blue", "green", "indico", None, None, "orange", "red", "violet", "yellow", "green", None, "blue"]) df_reduce = df_in[:, [count(f.C0), count()], "C0"] frame_integrity_check(df_reduce) assert df_reduce.shape == (8, 3) assert df_reduce.ltypes == (ltype.str, ltype.int, ltype.int,) assert df_reduce.to_list() == [[None, "blue", "green", "indico", "orange", "red", "violet", "yellow"], [0, 2, 2, 1, 1, 1, 1, 1], [4, 2, 2, 1, 1, 1, 1, 1]]
def test_cast_time64_to_date32(): from datetime import date DT = dt.Frame([ d(2091, 11, 28, 15, 51, 27, 310000), d(1970, 1, 3, 21, 5, 2, 475000), d(1969, 12, 31, 23, 59, 59, 999999), d(1969, 12, 31, 0, 0, 0), d(1962, 4, 12, 3, 52, 27, 458000), None ]) assert DT.type == dt.Type.time64 DT[0] = dt.Type.date32 assert_equals( DT, dt.Frame([ date(2091, 11, 28), date(1970, 1, 3), date(1969, 12, 31), date(1969, 12, 31), date(1962, 4, 12), None ]))
def test_time64_minmax(): src = [ None, d(2000, 10, 18, 3, 30), d(2010, 11, 13, 15, 11, 59), d(2020, 2, 29, 20, 20, 20, 20), None ] DT = dt.Frame(src) assert DT.min1() == d(2000, 10, 18, 3, 30) assert DT.max1() == d(2020, 2, 29, 20, 20, 20, 20) assert DT.countna1() == 2
def test_compare(): DT = dt.Frame(A=[d(2010, 11, 5, i * 17 % 11, 0, 0) for i in range(11)]) DT['B'] = DT.sort(0) RES = DT[:, { "EQ": (f.A == f.B), "NE": (f.A != f.B), "LT": (f.A < f.B), "LE": (f.A <= f.B), "GE": (f.A >= f.B), "GT": (f.A > f.B) }] assert_equals( RES, dt.Frame(EQ=[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], NE=[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], LT=[0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], LE=[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], GE=[1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], GT=[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], type=dt.bool8))
def test_time64_kurt(): src = [ None, d(2010, 11, 13, 15, 11, 50), d(2010, 11, 13, 17, 11, 50), None, d(2010, 11, 13, 15, 11, 50), None ] DT_date = dt.Frame(src) DT_int = DT_date[:, dt.int64(f[0])] assert DT_date.kurt1() == DT_int.kurt1() assert_equals(DT_date.kurt(), DT_int.kurt())
def test_count_2d_dt_groupby_integer(): df_in = dt.Frame([[9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1], [0, 1, 0, 5, 3, 8, 1, 0, 2, 5, None, 8, 1]]) df_reduce = df_in[:, [count(f.C0), count(f.C1), count()], "C0"] frame_integrity_check(df_reduce) assert df_reduce.shape == (8, 4) assert df_reduce.ltypes == (ltype.int,) * 4 assert df_reduce.to_list() == [[None, 0, 1, 2, 3, 5, 8, 9], [0, 1, 1, 1, 2, 2, 2, 1], [3, 1, 1, 1, 2, 2, 1, 1], [3, 1, 1, 1, 2, 2, 2, 1]]
def test_arr32_repr_in_terminal(): DT = dt.Frame(A=[[1], [2, 3], None, [4, 5, 6], []]) assert str(DT) == (" | A \n" " | arr32(int32)\n" "-- + ------------\n" " 0 | [1] \n" " 1 | [2, 3] \n" " 2 | NA \n" " 3 | [4, 5, 6] \n" " 4 | [] \n" "[5 rows x 1 column]\n")
def test_round_int16_positive_ndigits(): DT = dt.Frame(A=[None, 12, 0, 34, -999, 32767, 10001, -32767] / dt.int16) assert_equals(DT[:, dtround(f.A)], DT) assert_equals(DT[:, dtround(f.A, ndigits=0)], DT) assert_equals(DT[:, dtround(f.A, ndigits=1)], DT) assert_equals(DT[:, dtround(f.A, ndigits=2)], DT) assert_equals(DT[:, dtround(f.A, ndigits=3)], DT) assert_equals(DT[:, dtround(f.A, ndigits=5)], DT) assert_equals(DT[:, dtround(f.A, ndigits=9)], DT) assert_equals(DT[:, dtround(f.A, ndigits=17)], DT) assert_equals(DT[:, dtround(f.A, ndigits=314)], DT)
def test_round_bool_positive_ndigits(): DT = dt.Frame(A=[True, False, None]) assert_equals(DT[:, dtround(f.A)], DT) assert_equals(DT[:, dtround(f.A, ndigits=0)], DT) assert_equals(DT[:, dtround(f.A, ndigits=1)], DT) assert_equals(DT[:, dtround(f.A, ndigits=2)], DT) assert_equals(DT[:, dtround(f.A, ndigits=3)], DT) assert_equals(DT[:, dtround(f.A, ndigits=5)], DT) assert_equals(DT[:, dtround(f.A, ndigits=9)], DT) assert_equals(DT[:, dtround(f.A, ndigits=19)], DT) assert_equals(DT[:, dtround(f.A, ndigits=999999)], DT)
def test_date32_kurt(): src = [None, d(2010, 11, 13), d(2010, 11, 12), d(2022, 11, 12), d(2010, 11, 13), None] DT_date = dt.Frame(src) DT_int = DT_date[:, dt.int32(f[0])] assert DT_date.kurt1() == DT_int.kurt1() assert_equals(DT_date.kurt(), DT_int.kurt())
def test_round_int8_positive_ndigits(): DT = dt.Frame(A=[None] + list(range(-127, 128)), stype=dt.int8) assert_equals(DT[:, dtround(f.A)], DT) assert_equals(DT[:, dtround(f.A, ndigits=0)], DT) assert_equals(DT[:, dtround(f.A, ndigits=1)], DT) assert_equals(DT[:, dtround(f.A, ndigits=2)], DT) assert_equals(DT[:, dtround(f.A, ndigits=3)], DT) assert_equals(DT[:, dtround(f.A, ndigits=5)], DT) assert_equals(DT[:, dtround(f.A, ndigits=9)], DT) assert_equals(DT[:, dtround(f.A, ndigits=17)], DT) assert_equals(DT[:, dtround(f.A, ndigits=987654321)], DT)
def test_median_grouped(): DT = dt.Frame(A=[0, 0, 0, 0, 1, 1, 1, 1, 1], B=[2, 6, 1, 0, -3, 4, None, None, -1], stypes={ "A": dt.int16, "B": dt.int32 }) RES = DT[:, median(f.B), by(f.A)] assert RES.shape == (2, 2) assert RES.stypes == (dt.int16, dt.float64) assert RES.to_list() == [[0, 1], [1.5, -1.0]]
def test_bool_create_force_from_exceptional(): # Here we check what happens if the objects that we are trying to # cast into bools throw an exception. An FExpr is just that kind of # object. # Two things are expected to happen here: the exception-raising objects # become `None`s, and exception objects are discarded. with pytest.raises(TypeError): assert dt.f.A DT = dt.Frame([None, dt.f.A, math.nan, dt.f[:], TypeError], type=bool) assert DT.to_list() == [[None, None, None, None, True]]