def test_repeat_empty_frame2(): f0 = dt.Frame(A=[], B=[], C=[], stypes=[dt.int32, dt.str32, dt.float32]) f1 = dt.repeat(f0, 1000) frame_integrity_check(f1) assert f1.names == f0.names assert f1.stypes == f0.stypes assert f1.to_list() == f0.to_list()
def test_issue1921(): n = 1921 DTA = dt.Frame(A=range(n)) DTB = dt.repeat(dt.Frame(B=["hey"], stype=dt.str64), n) DT = dt.cbind(DTA, DTB) out = DT.to_csv() assert out == "\n".join(["A,B"] + ["%d,hey" % i for i in range(n)] + [""])
def test_sort_consts(): DT = dt.Frame(A=[5], B=[7.9], C=["Hello"], D=[None]) DT = dt.repeat(DT, 1000) assert_equals(DT[:, :, sort(f.A)], DT) assert_equals(DT[:, :, sort(f.B)], DT) assert_equals(DT[:, :, sort(f.C)], DT) assert_equals(DT[:, :, sort(f.D)], DT)
def test_materialize(): DT1 = dt.Frame(A=range(12))[::2, :] DT2 = dt.repeat(dt.Frame(B=["red", "green", "blue"]), 2) DT3 = dt.Frame(C=[4, 2, 9.1, 12, 0]) DT = dt.cbind(DT1, DT2, DT3, force=True) assert frame_columns_virtual(DT) == (True, True, True) DT.materialize() assert frame_columns_virtual(DT) == (False, False, False)
def test_tonumpy_issue2050(): n = 1234 DT = dt.Frame(A=[1, 2, None, 4, 5], B=range(5), C=[4, None, None, None, 4], stype=int) DT = dt.repeat(DT[:, ["A", "B", "C"]], n) assert DT.sum().to_list() == [[12 * n], [10 * n], [8 * n]] assert DT.to_numpy().sum() == 30 * n
def test_cast_huge_to_str(): # Test that converting a huge column into str would properly overflow it # into str64 type. See issue #1695 # This test takes up to 2s to run (or up to 5s if doing an integrity check) DT = dt.repeat(dt.Frame(BIG=["ABCDEFGHIJ" * 100000]), 3000) assert DT.stypes == (dt.str32, ) RES = DT[:, dt.str32(f.BIG)] assert RES.stypes == (dt.str64, ) assert RES[-1, 0] == DT[0, 0]
def test_dt_repeat_multicol(): f0 = dt.Frame(A=[None, 1.4, -2.6, 3.9998], B=["row", "row", "row", "your boat"], C=[25, -9, 18, 2], D=[True, None, True, False]) f1 = dt.repeat(f0, 4) frame_integrity_check(f1) assert isview(f1) assert f1.names == f0.names assert f1.stypes == f0.stypes assert f1.to_list() == [col * 4 for col in f0.to_list()]
def test_materialize(): DT1 = dt.Frame(A=range(12))[::2, :] DT2 = dt.repeat(dt.Frame(B=["red", "green", "blue"]), 2) DT3 = dt.Frame(C=[4, 2, 9.1, 12, 0]) DT = dt.cbind(DT1, DT2, DT3, force=True) assert frame_column_rowindex(DT, 0).type == "slice" assert frame_column_rowindex(DT, 1).type == "arr32" assert frame_column_rowindex(DT, 2) is None DT.materialize() assert frame_column_rowindex(DT, 0) is None assert frame_column_rowindex(DT, 1) is None assert frame_column_rowindex(DT, 2) is None
def test_dt_repeat_empty_frame(): f0 = dt.Frame() f1 = dt.repeat(f0, 5) frame_integrity_check(f1) assert f1.to_list() == []
def test_dt_repeat_view(): f0 = dt.Frame(A=[1, 3, 4, 5], B=[2, 6, 3, 1]) f1 = f0[::2, :] f2 = dt.repeat(f1, 5) frame_integrity_check(f2) assert f2.to_dict() == {"A": [1, 4] * 5, "B": [2, 3] * 5}
def test_dt_repeat2(): f0 = dt.Frame(["A", "B", "CDE"]) f1 = dt.repeat(f0, 7) frame_integrity_check(f1) assert f1.to_list() == [f0.to_list()[0] * 7]
def test_dt_repeat(): f0 = dt.Frame(range(10)) f1 = dt.repeat(f0, 3) frame_integrity_check(f1) assert f1.to_list() == [list(range(10)) * 3]
def make_datatable(dt, rows, select, groupby=None, join=None, sort=None, engine=None, mode=None, replacement=None): """ Implementation of the `Frame.__call__()` method. This is the "main" function in the module; it is responsible for evaluating various transformations when they are applied to a target Frame. """ if isinstance(groupby, datatable.join): join = groupby groupby = None update_mode = mode == "update" delete_mode = mode == "delete" jframe = join.joinframe if join else None with f.bind_datatable(dt), g.bind_datatable(jframe): ee = make_engine(engine, dt, jframe) ee.rowindex = dt.internal.rowindex rowsnode = make_rowfilter(rows, ee) grbynode = make_groupby(groupby, ee) colsnode = make_columnset(select, ee, update_mode) sortnode = make_sort(sort, ee) if join: join.execute(ee) if sortnode: if isinstance(rowsnode, AllRFNode) and not grbynode: rowsnode = SortedRFNode(sortnode) else: # pragma: no cover raise NotImplementedError( "Cannot yet apply sort argument to a view datatable or " "combine with rows / groupby argument.") assert not delete_mode if update_mode: assert grbynode is None allrows = isinstance(rowsnode, AllRFNode) # Without `materialize`, when an update is applied to a view, # `rowsnode.execute()` will merge the rowindex implied by # `rowsnode` with its parent's rowindex. This will cause the # parent's data to be updated, which is wrong. dt.materialize() if isinstance(replacement, (int, float, str, type(None))): replacement = datatable.Frame([replacement]) if allrows: replacement = datatable.repeat(replacement, dt.nrows) elif isinstance(replacement, datatable.Frame): pass elif isinstance(replacement, BaseExpr): _col = replacement.evaluate_eager(ee) _colset = core.columns_from_columns([_col]) replacement = _colset.to_frame(None) else: replacement = datatable.Frame(replacement) rowsnode.execute() colsnode.execute_update(dt, replacement) return rowsnode.execute() if grbynode: grbynode.execute(ee) colsnode.execute() res_dt = ee.columns.to_frame(colsnode.column_names) if grbynode and res_dt.nrows == dt.nrows: res_dt.internal.groupby = ee.groupby return res_dt raise RuntimeError("Unable to calculate the result") # pragma: no cover
def test_dt_repeat_empty_frame(): f0 = dt.Frame() f1 = dt.repeat(f0, 5) f1.internal.check() assert f1.to_list() == []
def test_dt_repeat2(): f0 = dt.Frame(["A", "B", "CDE"]) f1 = dt.repeat(f0, 7) f1.internal.check() assert f1.to_list() == [f0.to_list()[0] * 7]
def test_dt_repeat(): f0 = dt.Frame(range(10)) f1 = dt.repeat(f0, 3) f1.internal.check() assert f1.to_list() == [list(range(10)) * 3]