def test_internal_rowindex(): d0 = dt.Frame(range(100)) d1 = d0[:20, :] ri0 = frame_column_rowindex(d0, 0) ri1 = frame_column_rowindex(d1, 0) assert ri0 is None assert repr(ri1) == "datatable.internal.RowIndex(0/20/1)"
def test_issue1225(): f0 = dt.Frame(A=[1, 2, 3], B=[5, 6, 8]) f1 = f0[::-1, :][:, [dt.float64(f.A), f.B]] assert frame_column_rowindex(f1, 0) is None assert frame_column_rowindex(f1, 1).type == "slice" f1.materialize() assert f1.stypes == (stype.float64, stype.int8) assert f1.to_list() == [[3.0, 2.0, 1.0], [8, 6, 5]]
def test_cbind_views3(): from datatable.internal import frame_column_rowindex d0 = dt.Frame(A=range(10))[::-1, :] d1 = dt.Frame(B=list("abcde") * 2) d2 = dt.Frame(C=range(1000))[[14, 19, 35, 17, 3, 0, 1, 0, 10, 777], :] d0.cbind([d1, d2]) assert d0.to_list() == [ list(range(10))[::-1], list("abcde" * 2), [14, 19, 35, 17, 3, 0, 1, 0, 10, 777] ] assert (repr(frame_column_rowindex( d0, 0)) == "datatable.internal.RowIndex(9/10/-1)") assert frame_column_rowindex(d0, 1) is None assert (repr(frame_column_rowindex( d0, 2)) == "datatable.internal.RowIndex(int32[10])")
def test_aggregate_3d_real(): d_in = dt.Frame([ [0.95, 0.50, 0.55, 0.10, 0.90, 0.50, 0.90, 0.50, 0.90, 1.00], [1.00, 0.55, 0.45, 0.05, 0.95, 0.45, 0.90, 0.40, 1.00, 0.90], [0.90, 0.50, 0.55, 0.00, 1.00, 0.50, 0.95, 0.45, 0.95, 0.95] ]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nd_max_bins=3) a_members = d_members.to_list()[0] d = d_exemplars.sort("C0") ri = frame_column_rowindex(d, 0).to_list() for i, member in enumerate(a_members): a_members[i] = ri.index(member) frame_integrity_check(d_members) assert d_members.shape == (10, 1) assert d_members.ltypes == (ltype.int,) assert a_members == [2, 1, 1, 0, 2, 1, 2, 1, 2, 2] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (3, 4) assert d_exemplars.ltypes == (ltype.real, ltype.real, ltype.real, ltype.int) assert d.to_list() == [[0.10, 0.50, 0.95], [0.05, 0.55, 1.00], [0.00, 0.50, 0.90], [1, 4, 5]] assert_equals(d_in, d_in_copy)
def aggregate_nd(nd): nrows = 1000 div = 50 column = [i % div for i in range(nrows)] matrix = [column] * nd out_types = [ltype.int] * nd + [ltype.int] out_value = [list(range(div))] * nd + \ [[nrows // div] * div] d_in = dt.Frame(matrix) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nd_max_bins=div, seed=1, progress_fn=report_progress) a_members = d_members.to_list()[0] d = d_exemplars.sort("C0") ri = frame_column_rowindex(d, 0).to_list() for i, member in enumerate(a_members): a_members[i] = ri.index(member) frame_integrity_check(d_members) assert d_members.shape == (nrows, 1) assert d_members.ltypes == (ltype.int, ) assert a_members == column frame_integrity_check(d_exemplars) assert d_exemplars.shape == (div, nd + 1) assert d_exemplars.ltypes == tuple(out_types) assert d.to_list() == out_value assert_equals(d_in, d_in_copy)
def aggregate_nd(nd): nrows = 1000 div = 50 column = [i % div for i in range(nrows)] matrix = [column] * nd out_types = [ltype.int] * nd + [ltype.int] out_value = [list(range(div))] * nd + \ [[nrows // div] * div] d_in = dt.Frame(matrix) d_in_copy = dt.Frame(d_in) messages = [] def progress_fn(p): assert 0 <= p.progress <= 1 assert p.status in ("running", "finished", "cancelled", "error") assert p.message in ("", "Preparing", "Aggregating", "Sampling", "Finalizing") messages.append(p) with dt.options.progress.context(callback=progress_fn, enabled=True, min_duration=0): [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nd_max_bins=div, seed=1) assert messages[0].progress == 0 assert messages[0].status == "running" assert messages[0].message == "Preparing" assert messages[-2].progress <= 1.0 assert messages[-2].status == "running" assert messages[-2].message == "Finalizing" assert messages[-1].progress == 1.0 assert messages[-1].status == "finished" assert messages[-1].message == "" a_members = d_members.to_list()[0] d = d_exemplars.sort("C0") ri = frame_column_rowindex(d, 0).to_list() for i, member in enumerate(a_members): a_members[i] = ri.index(member) frame_integrity_check(d_members) assert d_members.shape == (nrows, 1) assert d_members.ltypes == (ltype.int, ) assert a_members == column frame_integrity_check(d_exemplars) assert d_exemplars.shape == (div, nd + 1) assert d_exemplars.ltypes == tuple(out_types) assert d.to_list() == out_value assert_equals(d_in, d_in_copy)
def test_dt0_properties(dt0): """Test basic properties of the Frame object.""" assert isinstance(dt0, dt.Frame) assert dt0.nrows == 10 assert dt0.ncols == 3 assert dt0.shape == (10, 3) # must be a tuple, not a list! assert dt0.names == ("colA", "colB", "colC") assert dt0.ltypes == (ltype.bool, ltype.int, ltype.real) assert dt0.stypes == (stype.bool8, stype.int16, stype.float64) for i in range(dt0.ncols): assert frame_column_rowindex(dt0, i) is None frame_integrity_check(dt0)
def test_materialize(): DT1 = dt.Frame(A=range(12))[::2, :] DT2 = dt.repeat(dt.Frame(B=["red", "green", "blue"]), 2) DT3 = dt.Frame(C=[4, 2, 9.1, 12, 0]) DT = dt.cbind(DT1, DT2, DT3, force=True) assert frame_column_rowindex(DT, 0).type == "slice" assert frame_column_rowindex(DT, 1).type == "arr32" assert frame_column_rowindex(DT, 2) is None DT.materialize() assert frame_column_rowindex(DT, 0) is None assert frame_column_rowindex(DT, 1) is None assert frame_column_rowindex(DT, 2) is None
def test_cast_views_all(viewtype, source_stype, target_stype): # TODO: add rowindex with NAs after #1496 # TODO: add rowindex ARR64 somehow... selector = [ slice(1, None, 2), slice(3, -1), [5, 2, 3, 0, 0, 3, 7], dt.Frame([3, 0, 1, 4], stype=dt.int64) ][viewtype] DT = dt.Frame(A=range(10), stype=source_stype) DT = DT[selector, :] ri = frame_column_rowindex(DT, 0) assert DT.stypes == (source_stype, ) assert ri is not None assert ri.type == "slice" if viewtype < 2 else "arr32" RES = DT[:, target_stype(f.A)] assert RES.stypes == (target_stype, ) ans1 = RES.to_list()[0] DT.materialize() ans2 = DT[:, target_stype(f.A)].to_list()[0] assert ans1 == ans2
def isview(frame): return any( frame_column_rowindex(frame, i) is not None for i in range(frame.ncols))
def is_arr(DT): for i in range(DT.ncols): r = frame_column_rowindex(DT, i) if r is None or r.type != "arr32": return False return True