Ejemplo n.º 1
0
def test_j_dict(dt0, tbl0):
    dt1 = dt0[:, {"x": f[0], "y": f["D"]}]
    dt1.internal.check()
    assert dt1.shape == (6, 2)
    assert same_iterables(dt1.names, ("x", "y"))
    assert not dt1.internal.isview
    assert same_iterables(dt1.to_list(), [tbl0[0], tbl0[3]])
Ejemplo n.º 2
0
def test_create_from_kwargs1():
    d0 = dt.Frame(A=[1, 2, 3], B=[True, None, False], C=["a", "b", "c"])
    frame_integrity_check(d0)
    assert same_iterables(d0.names, ("A", "B", "C"))
    assert same_iterables(d0.to_list(), [[1, 2, 3],
                                         [True, None, False],
                                         ["a", "b", "c"]])
Ejemplo n.º 3
0
def test_j_dict(dt0, tbl0):
    dt1 = dt0[:, {"x": f[0], "y": f["D"]}]
    frame_integrity_check(dt1)
    assert dt1.shape == (6, 2)
    assert same_iterables(dt1.names, ("x", "y"))
    assert not isview(dt1)
    assert same_iterables(dt1.to_list(), [tbl0[0], tbl0[3]])
Ejemplo n.º 4
0
def test_cols_expression(dt0, tbl0):
    """
    Check that it is possible to select computed columns:
        dt[lambda f: [f.A + f.B]]
    """
    dt1 = dt0[:, f.A + f.B]
    dt1.internal.check()
    assert dt1.shape == (6, 1)
    assert dt1.ltypes == (ltype.int, )
    assert as_list(dt1) == [[tbl0[0][i] + tbl0[1][i] for i in range(6)]]
    dt2 = dt0[:, [f.A + f.B, f.C - f.D, f.A / f.C, f.B * f.D]]
    dt2.internal.check()
    assert dt2.shape == (6, 4)
    assert dt2.ltypes == (ltype.int, ltype.real, ltype.real, ltype.int)
    assert as_list(dt2) == [[tbl0[0][i] + tbl0[1][i] for i in range(6)],
                            [tbl0[2][i] - tbl0[3][i] for i in range(6)],
                            [tbl0[0][i] / tbl0[2][i] for i in range(6)],
                            [tbl0[1][i] * tbl0[3][i] for i in range(6)]]
    dt3 = dt0[:, {"foo": f.A + f.B - f.C * 10, "a": f.A, "b": 1, "c": 2}]
    dt3.internal.check()
    assert dt3.shape == (6, 4)
    assert same_iterables(dt3.names, ("foo", "a", "b", "c"))
    assert same_iterables(dt3.ltypes,
                          (ltype.real, ltype.int, ltype.int, ltype.real))
    assert not dt3.internal.isview
    assert as_list(dt3["foo"]) == [[tbl0[0][i] + tbl0[1][i] - tbl0[2][i] * 10
                                    for i in range(6)]]
Ejemplo n.º 5
0
def test_create_from_kwargs2():
    d0 = dt.Frame(x=range(4), y=[1, 3, 8, 0], stypes=[dt.int64, dt.float32])
    frame_integrity_check(d0)
    assert d0.shape == (4, 2)
    assert same_iterables(d0.names, ("x", "y"))
    assert same_iterables(d0.stypes, (dt.int64, dt.float32))
    assert same_iterables(d0.to_list(), [[0, 1, 2, 3], [1, 3, 8, 0]])
Ejemplo n.º 6
0
def test_create_from_kwargs1():
    d0 = dt.Frame(A=[1, 2, 3], B=[True, None, False], C=["a", "b", "c"])
    d0.internal.check()
    assert same_iterables(d0.names, ("A", "B", "C"))
    assert same_iterables(d0.topython(), [[1, 2, 3],
                                          [True, None, False],
                                          ["a", "b", "c"]])
Ejemplo n.º 7
0
def test_create_from_dict():
    d7 = dt.Frame({"A": [1, 5, 10],
                   "B": [True, False, None],
                   "C": ["alpha", "beta", "gamma"]})
    assert d7.shape == (3, 3)
    assert same_iterables(d7.names, ("A", "B", "C"))
    assert same_iterables(d7.ltypes, (ltype.int, ltype.bool, ltype.str))
    frame_integrity_check(d7)
Ejemplo n.º 8
0
def test_create_from_mixed_sources(numpy):
    df = dt.Frame({"A": numpy.random.randn(5),
                   "B": range(5),
                   "C": ["foo", "baw", "garrgh", "yex", "fin"],
                   "D": numpy.array([5, 8, 1, 3, 5813], dtype="int32")})
    frame_integrity_check(df)
    assert df.shape == (5, 4)
    assert same_iterables(df.names, ("A", "B", "C", "D"))
    assert same_iterables(df.stypes, (stype.float64, stype.int32, stype.str32,
                                      stype.int32))
Ejemplo n.º 9
0
def test_groupby_multi_large(seed):
    random.seed(seed)
    letters = "abcdefghijklmn"
    n = 100 + int(random.expovariate(0.0001))
    col0 = [random.choice([True, False]) for _ in range(n)]
    col1 = [random.randint(-10, 10) for _ in range(n)]
    col2 = [random.choice(letters) for _ in range(n)]
    col3 = [random.random() for _ in range(n)]
    rows = [(col0[i], col1[i], col2[i], col3[i]) for i in range(n)]
    rows.sort()
    grouped = []
    lastkey = rows[0][:3]
    sumval = 0
    for i in range(n):
        ikey = rows[i][:3]
        if ikey != lastkey:
            grouped.append(lastkey + (sumval, ))
            lastkey = ikey
            sumval = 0
        sumval += rows[i][3]
    grouped.append(lastkey + (sumval, ))
    DT0 = dt.Frame([col0, col1, col2, col3], names=["A", "B", "C", "D"])
    DT1 = DT0[:, sum(f.D), by(f.A, f.B, f.C)]
    DT2 = dt.Frame(grouped)
    assert same_iterables(DT1.to_list(), DT2.to_list())
Ejemplo n.º 10
0
def test_cols_dict(dt0, tbl0):
    """
    Test selecting multiple columns using a dictionary:
        dt[{"x": "A", "y": "B"}]
    """
    dt1 = dt0(select={"x": 0, "y": "D"})
    dt1.internal.check()
    assert dt1.shape == (6, 2)
    assert same_iterables(dt1.names, ("x", "y"))
    assert not dt1.internal.isview
    assert same_iterables(as_list(dt1), [tbl0[0], tbl0[3]])
    dt2 = dt0[{"_": slice(None)}]
    dt2.internal.check()
    assert dt2.shape == (6, 4)
    assert dt2.names == ("_", "_1", "_2", "_3")
    assert not dt2.internal.isview
    assert as_list(dt2) == tbl0
Ejemplo n.º 11
0
def test_create_from_dict_of_numpy_arrays(numpy):
    df = dt.Frame({"A": numpy.random.randn(67),
                   "B": numpy.random.randn(67),
                   "C": numpy.random.randn(67)})
    frame_integrity_check(df)
    assert df.shape == (67, 3)
    assert df.stypes == (stype.float64,) * 3
    assert same_iterables(df.names, ("A", "B", "C"))
Ejemplo n.º 12
0
def test_topandas():
    d0 = dt.Frame({"A": [1, 5], "B": ["hello", "you"], "C": [True, False]})
    p0 = d0.to_pandas()
    assert p0.shape == (2, 3)
    assert same_iterables(p0.columns.tolist(), ["A", "B", "C"])
    assert p0["A"].values.tolist() == [1, 5]
    assert p0["B"].values.tolist() == ["hello", "you"]
    assert p0["C"].values.tolist() == [True, False]
Ejemplo n.º 13
0
def test_tonumpy1(numpy):
    d0 = dt.Frame({"A": [1, 5], "B": ["helo", "you"],
                   "C": [True, False], "D": [3.4, None]})
    a0 = d0.to_numpy()
    assert a0.shape == d0.shape
    assert a0.dtype == numpy.dtype("object")
    assert same_iterables(a0.T.tolist(), d0.to_list())
    a1 = numpy.array(d0)
    assert (a0 == a1).all()
Ejemplo n.º 14
0
def test_0rows_frame():
    dt0 = dt.Frame(A=[], B=[], stype=int)
    assert dt0.shape == (0, 2)
    dt1 = dt0[f.A == 0, :]
    frame_integrity_check(dt1)
    assert dt1.shape == (0, 2)
    assert same_iterables(dt1.names, ("A", "B"))
    dt2 = dt0[:, f.A - f.B]
    frame_integrity_check(dt2)
    assert dt2.shape == (0, 1)
    assert dt2.ltypes == (ltype.int, )
Ejemplo n.º 15
0
def test_j_expression(dt0, tbl0):
    dt1 = dt0[:, f.A + f.B]
    frame_integrity_check(dt1)
    assert dt1.shape == (6, 1)
    assert dt1.ltypes == (ltype.int, )
    assert dt1.to_list() == [[tbl0[0][i] + tbl0[1][i] for i in range(6)]]
    dt2 = dt0[:, [f.A + f.B, f.C - f.D, f.A / f.C, f.B * f.D]]
    frame_integrity_check(dt2)
    assert dt2.shape == (6, 4)
    assert dt2.ltypes == (ltype.int, ltype.real, ltype.real, ltype.int)
    assert dt2.to_list() == [[tbl0[0][i] + tbl0[1][i] for i in range(6)],
                             [tbl0[2][i] - tbl0[3][i] for i in range(6)],
                             [tbl0[0][i] / tbl0[2][i] for i in range(6)],
                             [tbl0[1][i] * tbl0[3][i] for i in range(6)]]
    dt3 = dt0[:, {"foo": f.A + f.B - f.C * 10, "a": f.A, "b": f[1], "c": f[2]}]
    frame_integrity_check(dt3)
    assert dt3.shape == (6, 4)
    assert same_iterables(dt3.names, ("foo", "a", "b", "c"))
    assert same_iterables(dt3.ltypes,
                          (ltype.real, ltype.int, ltype.int, ltype.real))
    assert dt3[:, "foo"].to_list() == [[
        tbl0[0][i] + tbl0[1][i] - tbl0[2][i] * 10 for i in range(6)
    ]]
Ejemplo n.º 16
0
def test_issue998():
    src = find_file("h2o-3", "bigdata", "laptop", "higgs_head_2M.csv")
    # The file is 1.46GB in size. I could not find a smaller file that exhibits
    # this problem... The issue only appeared in single-threaded mode, so we
    # have to read this file slowly. On my laptop, this test runs in about 8s.
    f0 = dt.fread(src, nthreads=1, fill=True, na_strings=["-999"])
    assert f0.shape == (2000000, 29)
    assert f0.names == tuple("C%d" % i for i in range(f0.ncols))
    assert f0.stypes == (dt.stype.float64, ) * f0.ncols
    assert same_iterables(
        f0.sum().to_list(),
        [[1058818.0], [1981919.6107614636], [701.7858121241807],
         [-195.48500674014213], [1996390.3476011853], [-1759.5364254778178],
         [1980743.446578741], [-1108.7512905876065], [1712.947751407064],
         [2003064.4534490108], [1985100.3810670376], [1190.8404791812281],
         [384.00605312064], [1998592.0739881992], [1984490.1900614202],
         [2033.9754767678387], [-1028.0810855487362], [2001341.0813384056],
         [1971311.3271338642], [-943.92552991907], [-1079.3848229270661],
         [1996588.295421958], [2068619.2163415626], [2049516.5437491536],
         [2100795.4839400873], [2019540.6562294513], [1946283.046177674],
         [2066298.020782411], [1919714.12131235]])
Ejemplo n.º 17
0
def test_cols_colselector(dt0, tbl0):
    """
    Check that a "column selector" expression is equivalent to directly indexing
    the column:
        dt[lambda f: f.A]
    """
    dt1 = dt0(select=lambda f: f.B)
    dt1.internal.check()
    assert dt1.shape == (6, 1)
    assert dt1.names == ("B", )
    assert not dt1.internal.isview
    assert as_list(dt1) == [tbl0[1]]
    dt2 = dt0(select=lambda f: [f.A, f.C])
    dt2.internal.check()
    assert dt2.shape == (6, 2)
    assert dt2.names == ("A", "C")
    assert not dt2.internal.isview
    assert as_list(dt2) == [tbl0[0], tbl0[2]]
    dt3 = dt0[lambda f: {"x": f.A, "y": f.D}]
    dt3.internal.check()
    assert dt3.shape == (6, 2)
    assert same_iterables(dt3.names, ("x", "y"))
    assert not dt3.internal.isview
Ejemplo n.º 18
0
def test_j_colselector3(dt0, tbl0):
    dt3 = dt0[:, {"x": f.A, "y": f.D}]
    frame_integrity_check(dt3)
    assert dt3.shape == (6, 2)
    assert same_iterables(dt3.names, ("x", "y"))
    assert not isview(dt3)
Ejemplo n.º 19
0
def test_create_from_pandas(pandas):
    p = pandas.DataFrame({"A": [2, 5, 8], "B": ["e", "r", "qq"]})
    d = dt.Frame(p)
    d.internal.check()
    assert d.shape == (3, 2)
    assert same_iterables(d.names, ("A", "B"))
Ejemplo n.º 20
0
def test_create_from_pandas_with_names(pandas):
    p = pandas.DataFrame({"A": [2, 5, 8], "B": ["e", "r", "qq"]})
    d = dt.Frame(p, names=["miniature", "miniscule"])
    frame_integrity_check(d)
    assert d.shape == (3, 2)
    assert same_iterables(d.names, ("miniature", "miniscule"))
Ejemplo n.º 21
0
def test_j_colselector3(dt0, tbl0):
    dt3 = dt0[:, {"x": f.A, "y": f.D}]
    dt3.internal.check()
    assert dt3.shape == (6, 2)
    assert same_iterables(dt3.names, ("x", "y"))
    assert not dt3.internal.isview