예제 #1
0
def test_internal_rowindex():
    d0 = dt.Frame(range(100))
    d1 = d0[:20, :]
    ri0 = frame_column_rowindex(d0, 0)
    ri1 = frame_column_rowindex(d1, 0)
    assert ri0 is None
    assert repr(ri1) == "datatable.internal.RowIndex(0/20/1)"
예제 #2
0
def test_issue1225():
    f0 = dt.Frame(A=[1, 2, 3], B=[5, 6, 8])
    f1 = f0[::-1, :][:, [dt.float64(f.A), f.B]]
    assert frame_column_rowindex(f1, 0) is None
    assert frame_column_rowindex(f1, 1).type == "slice"
    f1.materialize()
    assert f1.stypes == (stype.float64, stype.int8)
    assert f1.to_list() == [[3.0, 2.0, 1.0], [8, 6, 5]]
예제 #3
0
def test_cbind_views3():
    from datatable.internal import frame_column_rowindex
    d0 = dt.Frame(A=range(10))[::-1, :]
    d1 = dt.Frame(B=list("abcde") * 2)
    d2 = dt.Frame(C=range(1000))[[14, 19, 35, 17, 3, 0, 1, 0, 10, 777], :]
    d0.cbind([d1, d2])
    assert d0.to_list() == [
        list(range(10))[::-1],
        list("abcde" * 2), [14, 19, 35, 17, 3, 0, 1, 0, 10, 777]
    ]
    assert (repr(frame_column_rowindex(
        d0, 0)) == "datatable.internal.RowIndex(9/10/-1)")
    assert frame_column_rowindex(d0, 1) is None
    assert (repr(frame_column_rowindex(
        d0, 2)) == "datatable.internal.RowIndex(int32[10])")
예제 #4
0
def test_aggregate_3d_real():
    d_in = dt.Frame([
        [0.95, 0.50, 0.55, 0.10, 0.90, 0.50, 0.90, 0.50, 0.90, 1.00],
        [1.00, 0.55, 0.45, 0.05, 0.95, 0.45, 0.90, 0.40, 1.00, 0.90],
        [0.90, 0.50, 0.55, 0.00, 1.00, 0.50, 0.95, 0.45, 0.95, 0.95]
    ])
    d_in_copy = dt.Frame(d_in)
    [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nd_max_bins=3)
    a_members = d_members.to_list()[0]
    d = d_exemplars.sort("C0")
    ri = frame_column_rowindex(d, 0).to_list()
    for i, member in enumerate(a_members):
        a_members[i] = ri.index(member)

    frame_integrity_check(d_members)
    assert d_members.shape == (10, 1)
    assert d_members.ltypes == (ltype.int,)
    assert a_members == [2, 1, 1, 0, 2, 1, 2, 1, 2, 2]

    frame_integrity_check(d_exemplars)
    assert d_exemplars.shape == (3, 4)
    assert d_exemplars.ltypes == (ltype.real, ltype.real, ltype.real, ltype.int)
    assert d.to_list() == [[0.10, 0.50, 0.95],
                           [0.05, 0.55, 1.00],
                           [0.00, 0.50, 0.90],
                           [1, 4, 5]]
    assert_equals(d_in, d_in_copy)
예제 #5
0
def aggregate_nd(nd):
    nrows = 1000
    div = 50
    column = [i % div for i in range(nrows)]
    matrix = [column] * nd
    out_types = [ltype.int] * nd + [ltype.int]
    out_value = [list(range(div))] * nd + \
                [[nrows // div] * div]

    d_in = dt.Frame(matrix)
    d_in_copy = dt.Frame(d_in)
    [d_exemplars, d_members] = aggregate(d_in,
                                         min_rows=0,
                                         nd_max_bins=div,
                                         seed=1,
                                         progress_fn=report_progress)

    a_members = d_members.to_list()[0]
    d = d_exemplars.sort("C0")
    ri = frame_column_rowindex(d, 0).to_list()
    for i, member in enumerate(a_members):
        a_members[i] = ri.index(member)

    frame_integrity_check(d_members)
    assert d_members.shape == (nrows, 1)
    assert d_members.ltypes == (ltype.int, )
    assert a_members == column
    frame_integrity_check(d_exemplars)
    assert d_exemplars.shape == (div, nd + 1)
    assert d_exemplars.ltypes == tuple(out_types)
    assert d.to_list() == out_value
    assert_equals(d_in, d_in_copy)
예제 #6
0
def aggregate_nd(nd):
    nrows = 1000
    div = 50
    column = [i % div for i in range(nrows)]
    matrix = [column] * nd
    out_types = [ltype.int] * nd + [ltype.int]
    out_value = [list(range(div))] * nd + \
                [[nrows // div] * div]

    d_in = dt.Frame(matrix)
    d_in_copy = dt.Frame(d_in)

    messages = []

    def progress_fn(p):
        assert 0 <= p.progress <= 1
        assert p.status in ("running", "finished", "cancelled", "error")
        assert p.message in ("", "Preparing", "Aggregating", "Sampling",
                             "Finalizing")
        messages.append(p)

    with dt.options.progress.context(callback=progress_fn,
                                     enabled=True,
                                     min_duration=0):
        [d_exemplars, d_members] = aggregate(d_in,
                                             min_rows=0,
                                             nd_max_bins=div,
                                             seed=1)
        assert messages[0].progress == 0
        assert messages[0].status == "running"
        assert messages[0].message == "Preparing"
        assert messages[-2].progress <= 1.0
        assert messages[-2].status == "running"
        assert messages[-2].message == "Finalizing"
        assert messages[-1].progress == 1.0
        assert messages[-1].status == "finished"
        assert messages[-1].message == ""

    a_members = d_members.to_list()[0]
    d = d_exemplars.sort("C0")
    ri = frame_column_rowindex(d, 0).to_list()
    for i, member in enumerate(a_members):
        a_members[i] = ri.index(member)

    frame_integrity_check(d_members)
    assert d_members.shape == (nrows, 1)
    assert d_members.ltypes == (ltype.int, )
    assert a_members == column
    frame_integrity_check(d_exemplars)
    assert d_exemplars.shape == (div, nd + 1)
    assert d_exemplars.ltypes == tuple(out_types)
    assert d.to_list() == out_value
    assert_equals(d_in, d_in_copy)
예제 #7
0
def test_dt0_properties(dt0):
    """Test basic properties of the Frame object."""
    assert isinstance(dt0, dt.Frame)
    assert dt0.nrows == 10
    assert dt0.ncols == 3
    assert dt0.shape == (10, 3)  # must be a tuple, not a list!
    assert dt0.names == ("colA", "colB", "colC")
    assert dt0.ltypes == (ltype.bool, ltype.int, ltype.real)
    assert dt0.stypes == (stype.bool8, stype.int16, stype.float64)
    for i in range(dt0.ncols):
        assert frame_column_rowindex(dt0, i) is None
    frame_integrity_check(dt0)
예제 #8
0
def test_materialize():
    DT1 = dt.Frame(A=range(12))[::2, :]
    DT2 = dt.repeat(dt.Frame(B=["red", "green", "blue"]), 2)
    DT3 = dt.Frame(C=[4, 2, 9.1, 12, 0])
    DT = dt.cbind(DT1, DT2, DT3, force=True)
    assert frame_column_rowindex(DT, 0).type == "slice"
    assert frame_column_rowindex(DT, 1).type == "arr32"
    assert frame_column_rowindex(DT, 2) is None
    DT.materialize()
    assert frame_column_rowindex(DT, 0) is None
    assert frame_column_rowindex(DT, 1) is None
    assert frame_column_rowindex(DT, 2) is None
예제 #9
0
def test_cast_views_all(viewtype, source_stype, target_stype):
    # TODO: add rowindex with NAs after #1496
    # TODO: add rowindex ARR64 somehow...
    selector = [
        slice(1, None, 2),
        slice(3, -1), [5, 2, 3, 0, 0, 3, 7],
        dt.Frame([3, 0, 1, 4], stype=dt.int64)
    ][viewtype]
    DT = dt.Frame(A=range(10), stype=source_stype)
    DT = DT[selector, :]
    ri = frame_column_rowindex(DT, 0)
    assert DT.stypes == (source_stype, )
    assert ri is not None
    assert ri.type == "slice" if viewtype < 2 else "arr32"

    RES = DT[:, target_stype(f.A)]
    assert RES.stypes == (target_stype, )
    ans1 = RES.to_list()[0]

    DT.materialize()
    ans2 = DT[:, target_stype(f.A)].to_list()[0]
    assert ans1 == ans2
예제 #10
0
def isview(frame):
    return any(
        frame_column_rowindex(frame, i) is not None
        for i in range(frame.ncols))
예제 #11
0
def is_arr(DT):
    for i in range(DT.ncols):
        r = frame_column_rowindex(DT, i)
        if r is None or r.type != "arr32":
            return False
    return True