Exemple #1
0
def test_div_booleans_integers_floats_random(seed):
    random.seed(seed)
    n = 1000
    src1 = [random.randint(-100, 100) for _ in range(n)]
    src2 = [random.randint(0, 1) for _ in range(n)]
    src3 = [random.random() * 1000 - 500 for _ in range(n)]

    DT = dt.Frame(x=src1, y=src2 / dt.bool8, z=src3)
    RES = DT[:, [
        f.x / f.y, f.y / f.x, f.y / f.z, f.z / f.y, f.z / f.x, f.x / f.z
    ]]
    EXP = dt.Frame([[dt_div(src1[i], src2[i]) for i in range(n)],
                    [dt_div(src2[i], src1[i]) for i in range(n)],
                    [dt_div(src2[i], src3[i]) for i in range(n)],
                    [dt_div(src3[i], src2[i]) for i in range(n)],
                    [dt_div(src3[i], src1[i]) for i in range(n)],
                    [dt_div(src1[i], src3[i]) for i in range(n)]])
    assert_equals(RES, EXP)
Exemple #2
0
def test_save_to_jay(tempfile_jay):
    src = [d(1, 1, 1), d(2001, 12, 13), d(2026, 5, 9), None, d(1956, 11, 11)]
    DT = dt.Frame(src)
    DT.to_jay(tempfile_jay)
    del DT
    DT2 = dt.fread(tempfile_jay)
    assert DT2.shape == (5, 1)
    assert DT2.type == dt.Type.date32
    assert DT2.to_list()[0] == src
Exemple #3
0
def test_corr_random(numpy, seed):
    numpy.random.seed(seed)
    arr1 = numpy.random.rand(100)
    arr2 = numpy.random.rand(100)
    np_corr = numpy.corrcoef(arr1, arr2)[0, 1]

    DT = dt.Frame([arr1, arr2])
    dt_corr = DT[:, corr(f[0], f[1])][0, 0]
    assert numpy.isclose(np_corr, dt_corr, atol=0, rtol=1e-12)
Exemple #4
0
def test_select_dates():
    DT = dt.Frame(A=[12], B=[d(2000, 12, 20)], C=[True])
    assert DT.types == [dt.Type.int32, dt.Type.date32, dt.Type.bool8]
    RES1 = DT[:, f[d]]
    RES2 = DT[:, d]
    RES3 = DT[:, dt.ltype.time]
    assert_equals(RES1, DT['B'])
    assert_equals(RES2, DT['B'])
    assert_equals(RES3, DT['B'])
Exemple #5
0
def test_assign_nonexisting_column():
    # See #1983: if column `B` is created at a wrong moment in the evaluation
    # sequence, this may seg.fault
    DT = dt.Frame(A=range(5))
    with pytest.raises(KeyError,
                       match="Column B does not exist in the "
                       "Frame"):
        DT[:, "B"] = f.B + 1
    frame_integrity_check(DT)
Exemple #6
0
def test_slice_wrong_types2(ttype):
    DT = dt.Frame(A=[None], B=["Hello"], types={"A": ttype})
    msg = 'Non-integer expressions cannot be used inside a slice'
    with pytest.raises(TypeError, match=msg):
        DT[:, f.B[f.A:]]
    with pytest.raises(TypeError, match=msg):
        DT[:, f.B[:f.A]]
    with pytest.raises(TypeError, match=msg):
        DT[:, f.B[::f.A]]
Exemple #7
0
def test_ymdt_unnamed():
    DT = dt.Frame(Y=[2001, 2003, 2005, 2020, 1960],
                  M=[1, 5, 4, 11, 8],
                  D=[12, 18, 30, 1, 14],
                  h=[7, 14, 22, 23, 12],
                  m=[15, 30, 0, 59, 0],
                  s=[12, 23, 0, 59, 27],
                  ns=[0, 0, 0, 999999000, 123000])
    RES = DT[:, ymdt(f.Y, f.M, f.D, f.h, f.m, f.s, f.ns)]
    assert_equals(
        RES,
        dt.Frame([
            t(2001, 1, 12, 7, 15, 12),
            t(2003, 5, 18, 14, 30, 23),
            t(2005, 4, 30, 22, 0, 0),
            t(2020, 11, 1, 23, 59, 59, 999999),
            t(1960, 8, 14, 12, 0, 27, 123)
        ]))
Exemple #8
0
def test_bool_create_column_forced():
    # When the column is forced into type bool, we effectively apply
    # bool(x) to each value in the list,
    # with the only exception being `math.nan`, which is converted into None.
    DT = dt.Frame([True, False, None, 0, 1, 12, 0.0, -0.0, "", "hi", math.nan],
                  type=bool)
    assert DT.type == dt.Type.bool8
    assert DT.to_list() == [[
        True, False, None, False, True, True, False, False, False, True, None
    ]]
Exemple #9
0
def test_sub_booleans_integers_floats_random(seed):
    random.seed(seed)
    n = 1000
    src1 = [random.randint(-100, 100) for _ in range(n)]
    src2 = [random.randint(0, 1) for _ in range(n)]
    src3 = [random.random() * 1000 - 500 for _ in range(n)]

    DT = dt.Frame(x=src1, y=src2/dt.bool8, z=src3)
    RES = DT[:, [f.x - f.y, f.y - f.x,
                 f.y - f.z, f.z - f.y,
                 f.z - f.x, f.x - f.z]]
    EXP = dt.Frame(
            C0=[src1[i] - src2[i] for i in range(n)],
            C2=[src2[i] - src3[i] for i in range(n)],
            C4=[src3[i] - src1[i] for i in range(n)]
          )
    assert_equals(RES[:, [0, 2, 4]], EXP)
    assert_equals(RES[:, [0, 2, 4]],
                  RES[:, {"C0":-f[1], "C2":-f[3], "C4":-f[5]}])
Exemple #10
0
def test_arr32_arr32_repr():
    DT = dt.Frame(V=[[[1, 2, 3], [4, 9]], None, [None], [[-1], [0, 13]]])
    assert str(DT) == ("   | V                  \n"
                       "   | arr32(arr32(int32))\n"
                       "-- + -------------------\n"
                       " 0 | [[1, 2, 3], [4, 9]]\n"
                       " 1 | NA                 \n"
                       " 2 | [NA]               \n"
                       " 3 | [[-1], [0, 13]]    \n"
                       "[4 rows x 1 column]\n")
Exemple #11
0
def test_median_wrong_stype():
    DT = dt.Frame(A=["foo"], B=["moo"], stypes={"A": dt.str32, "B": dt.str64})
    with pytest.raises(TypeError) as e:
        noop(DT[:, median(f.A)])
    assert ("Unable to apply reduce function median() to a column of "
            "type str32" in str(e.value))
    with pytest.raises(TypeError) as e:
        noop(DT[:, median(f.B)])
    assert ("Unable to apply reduce function median() to a column of "
            "type str64" in str(e.value))
Exemple #12
0
def test_update_misplaced():
    DT = dt.Frame(A=range(5))
    with pytest.raises(TypeError,
                       match="Column selector must be an integer "
                       "or a string"):
        DT[update(B=0)]
    with pytest.raises(TypeError,
                       match="Invalid item at position 2 in "
                       r"DT\[i, j, \.\.\.\] call"):
        DT[:, :, update(B=0)]
Exemple #13
0
def test_create_from_python1():
    src = [[1, 2, 3], [], [4, 5], [6], None, [7, 8, 10, -1]]
    DT = dt.Frame(A=src)
    assert DT.shape == (6, 1)
    assert DT.type == dt.Type.arr32(dt.Type.int32)
    assert DT.names == ("A", )
    assert DT.ltypes == (dt.ltype.invalid,
                         )  # These properties are deprecated, also
    assert DT.stypes == (dt.stype.arr32, )  # see issue #3142
    assert DT.to_list() == [src]
Exemple #14
0
def test_cast_string_to_date32(ttype):
    DT = dt.Frame(["2001-02-14",
                   "2012-11-24",
                   "noise", "2022-22-22", "2021-02-29", "2003-04-31",
                   "2020-02-29",
                   "2003-10-01",
                   "1969-12-31",
                   "1970-01-01",
                   "2000-00-00", "2000-01-1", "2000-01-"],
                   stype=ttype)
    DT[0] = dt.Type.date32
    assert_equals(DT, dt.Frame([d(2001, 2, 14),
                                d(2012, 11, 24),
                                None, None, None, None,
                                d(2020, 2, 29),
                                d(2003, 10, 1),
                                d(1969, 12, 31),
                                d(1970, 1, 1),
                                None, None, None]))
Exemple #15
0
def test_date32_relational():
    DT = dt.Frame(A=[d(2000, 1, 1), d(2010, 11, 17), None, d(2020, 3, 30),
                     None, d(1998, 5, 14), d(999, 9, 9)],
                  B=[d(2000, 1, 2), d(2010, 11, 17), None, d(2020, 1, 31),
                     d(1998, 5, 14), None, d(999, 9, 9)])
    RES = DT[:, {">": f.A > f.B,
                 ">=": f.A >= f.B,
                 "==": f.A == f.B,
                 "!=": f.A != f.B,
                 "<=": f.A <= f.B,
                 "<": f.A < f.B}]
    assert_equals(RES, dt.Frame({
        ">": [False, False, False, True, False, False, False],
        ">=": [False, True, True, True, False, False, True],
        "==": [False, True, True, False, False, False, True],
        "!=": [True, False, False, True, True, True, False],
        "<=": [True, True, True, False, False, False, True],
        "<": [True, False, False, False, False, False, False],
    }))
Exemple #16
0
def test_mean_empty_frame():
    DT = dt.Frame([[]] * 4, names=list("ABCD"),
                  stypes=(dt.bool8, dt.int32, dt.float32, dt.float64))
    assert DT.shape == (0, 4)
    RZ = DT[:, mean(f[:])]
    frame_integrity_check(RZ)
    assert RZ.shape == (1, 4)
    assert RZ.names == ("A", "B", "C", "D")
    assert RZ.stypes == (dt.float64, dt.float64, dt.float32, dt.float64)
    assert RZ.to_list() == [[None]] * 4
Exemple #17
0
def test_time64_create_from_python():
    d = datetime.datetime
    src = [
        d(2000, 10, 18, 3, 30),
        d(2010, 11, 13, 15, 11, 59),
        d(2020, 2, 29, 20, 20, 20, 20), None
    ]
    DT = dt.Frame(src)
    assert DT.types == [dt.Type.time64]
    assert DT.to_list() == [src]
Exemple #18
0
def test_count_dt_groupby_string():
    df_in = dt.Frame([None, "blue", "green", "indico", None, None, "orange",
                      "red", "violet", "yellow", "green", None, "blue"])
    df_reduce = df_in[:, [count(f.C0), count()], "C0"]
    frame_integrity_check(df_reduce)
    assert df_reduce.shape == (8, 3)
    assert df_reduce.ltypes == (ltype.str, ltype.int, ltype.int,)
    assert df_reduce.to_list() == [[None, "blue", "green", "indico", "orange",
                                    "red", "violet", "yellow"],
                                   [0, 2, 2, 1, 1, 1, 1, 1],
                                   [4, 2, 2, 1, 1, 1, 1, 1]]
Exemple #19
0
def test_cast_time64_to_date32():
    from datetime import date
    DT = dt.Frame([
        d(2091, 11, 28, 15, 51, 27, 310000),
        d(1970, 1, 3, 21, 5, 2, 475000),
        d(1969, 12, 31, 23, 59, 59, 999999),
        d(1969, 12, 31, 0, 0, 0),
        d(1962, 4, 12, 3, 52, 27, 458000), None
    ])
    assert DT.type == dt.Type.time64
    DT[0] = dt.Type.date32
    assert_equals(
        DT,
        dt.Frame([
            date(2091, 11, 28),
            date(1970, 1, 3),
            date(1969, 12, 31),
            date(1969, 12, 31),
            date(1962, 4, 12), None
        ]))
Exemple #20
0
def test_time64_minmax():
    src = [
        None,
        d(2000, 10, 18, 3, 30),
        d(2010, 11, 13, 15, 11, 59),
        d(2020, 2, 29, 20, 20, 20, 20), None
    ]
    DT = dt.Frame(src)
    assert DT.min1() == d(2000, 10, 18, 3, 30)
    assert DT.max1() == d(2020, 2, 29, 20, 20, 20, 20)
    assert DT.countna1() == 2
Exemple #21
0
def test_compare():
    DT = dt.Frame(A=[d(2010, 11, 5, i * 17 % 11, 0, 0) for i in range(11)])
    DT['B'] = DT.sort(0)
    RES = DT[:, {
        "EQ": (f.A == f.B),
        "NE": (f.A != f.B),
        "LT": (f.A < f.B),
        "LE": (f.A <= f.B),
        "GE": (f.A >= f.B),
        "GT": (f.A > f.B)
    }]
    assert_equals(
        RES,
        dt.Frame(EQ=[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 NE=[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                 LT=[0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
                 LE=[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
                 GE=[1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
                 GT=[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
                 type=dt.bool8))
Exemple #22
0
def test_time64_kurt():
    src = [
        None,
        d(2010, 11, 13, 15, 11, 50),
        d(2010, 11, 13, 17, 11, 50), None,
        d(2010, 11, 13, 15, 11, 50), None
    ]
    DT_date = dt.Frame(src)
    DT_int = DT_date[:, dt.int64(f[0])]
    assert DT_date.kurt1() == DT_int.kurt1()
    assert_equals(DT_date.kurt(), DT_int.kurt())
Exemple #23
0
def test_count_2d_dt_groupby_integer():
    df_in = dt.Frame([[9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1],
                      [0, 1, 0, 5, 3, 8, 1, 0, 2, 5, None, 8, 1]])
    df_reduce = df_in[:, [count(f.C0), count(f.C1), count()], "C0"]
    frame_integrity_check(df_reduce)
    assert df_reduce.shape == (8, 4)
    assert df_reduce.ltypes == (ltype.int,) * 4
    assert df_reduce.to_list() == [[None, 0, 1, 2, 3, 5, 8, 9],
                                   [0, 1, 1, 1, 2, 2, 2, 1],
                                   [3, 1, 1, 1, 2, 2, 1, 1],
                                   [3, 1, 1, 1, 2, 2, 2, 1]]
Exemple #24
0
def test_arr32_repr_in_terminal():
    DT = dt.Frame(A=[[1], [2, 3], None, [4, 5, 6], []])
    assert str(DT) == ("   | A           \n"
                       "   | arr32(int32)\n"
                       "-- + ------------\n"
                       " 0 | [1]         \n"
                       " 1 | [2, 3]      \n"
                       " 2 | NA          \n"
                       " 3 | [4, 5, 6]   \n"
                       " 4 | []          \n"
                       "[5 rows x 1 column]\n")
Exemple #25
0
def test_round_int16_positive_ndigits():
    DT = dt.Frame(A=[None, 12, 0, 34, -999, 32767, 10001, -32767] / dt.int16)
    assert_equals(DT[:, dtround(f.A)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=0)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=1)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=2)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=3)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=5)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=9)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=17)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=314)], DT)
Exemple #26
0
def test_round_bool_positive_ndigits():
    DT = dt.Frame(A=[True, False, None])
    assert_equals(DT[:, dtround(f.A)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=0)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=1)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=2)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=3)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=5)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=9)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=19)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=999999)], DT)
Exemple #27
0
def test_date32_kurt():
    src = [None,
           d(2010, 11, 13),
           d(2010, 11, 12),
           d(2022, 11, 12),
           d(2010, 11, 13),
           None]
    DT_date = dt.Frame(src)
    DT_int = DT_date[:, dt.int32(f[0])]
    assert DT_date.kurt1() == DT_int.kurt1()
    assert_equals(DT_date.kurt(), DT_int.kurt())
Exemple #28
0
def test_round_int8_positive_ndigits():
    DT = dt.Frame(A=[None] + list(range(-127, 128)), stype=dt.int8)
    assert_equals(DT[:, dtround(f.A)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=0)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=1)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=2)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=3)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=5)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=9)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=17)], DT)
    assert_equals(DT[:, dtround(f.A, ndigits=987654321)], DT)
Exemple #29
0
def test_median_grouped():
    DT = dt.Frame(A=[0, 0, 0, 0, 1, 1, 1, 1, 1],
                  B=[2, 6, 1, 0, -3, 4, None, None, -1],
                  stypes={
                      "A": dt.int16,
                      "B": dt.int32
                  })
    RES = DT[:, median(f.B), by(f.A)]
    assert RES.shape == (2, 2)
    assert RES.stypes == (dt.int16, dt.float64)
    assert RES.to_list() == [[0, 1], [1.5, -1.0]]
Exemple #30
0
def test_bool_create_force_from_exceptional():
    # Here we check what happens if the objects that we are trying to
    # cast into bools throw an exception. An FExpr is just that kind of
    # object.
    # Two things are expected to happen here: the exception-raising objects
    # become `None`s, and exception objects are discarded.
    with pytest.raises(TypeError):
        assert dt.f.A

    DT = dt.Frame([None, dt.f.A, math.nan, dt.f[:], TypeError], type=bool)
    assert DT.to_list() == [[None, None, None, None, True]]