Example #1
0
def test_repeat_empty_frame2():
    f0 = dt.Frame(A=[], B=[], C=[], stypes=[dt.int32, dt.str32, dt.float32])
    f1 = dt.repeat(f0, 1000)
    frame_integrity_check(f1)
    assert f1.names == f0.names
    assert f1.stypes == f0.stypes
    assert f1.to_list() == f0.to_list()
Example #2
0
def test_issue1921():
    n = 1921
    DTA = dt.Frame(A=range(n))
    DTB = dt.repeat(dt.Frame(B=["hey"], stype=dt.str64), n)
    DT = dt.cbind(DTA, DTB)
    out = DT.to_csv()
    assert out == "\n".join(["A,B"] + ["%d,hey" % i for i in range(n)] + [""])
Example #3
0
def test_sort_consts():
    DT = dt.Frame(A=[5], B=[7.9], C=["Hello"], D=[None])
    DT = dt.repeat(DT, 1000)
    assert_equals(DT[:, :, sort(f.A)], DT)
    assert_equals(DT[:, :, sort(f.B)], DT)
    assert_equals(DT[:, :, sort(f.C)], DT)
    assert_equals(DT[:, :, sort(f.D)], DT)
Example #4
0
def test_materialize():
    DT1 = dt.Frame(A=range(12))[::2, :]
    DT2 = dt.repeat(dt.Frame(B=["red", "green", "blue"]), 2)
    DT3 = dt.Frame(C=[4, 2, 9.1, 12, 0])
    DT = dt.cbind(DT1, DT2, DT3, force=True)
    assert frame_columns_virtual(DT) == (True, True, True)
    DT.materialize()
    assert frame_columns_virtual(DT) == (False, False, False)
Example #5
0
def test_tonumpy_issue2050():
    n = 1234
    DT = dt.Frame(A=[1, 2, None, 4, 5],
                  B=range(5),
                  C=[4, None, None, None, 4],
                  stype=int)
    DT = dt.repeat(DT[:, ["A", "B", "C"]], n)
    assert DT.sum().to_list() == [[12 * n], [10 * n], [8 * n]]
    assert DT.to_numpy().sum() == 30 * n
Example #6
0
def test_cast_huge_to_str():
    # Test that converting a huge column into str would properly overflow it
    # into str64 type. See issue #1695
    # This test takes up to 2s to run (or up to 5s if doing an integrity check)
    DT = dt.repeat(dt.Frame(BIG=["ABCDEFGHIJ" * 100000]), 3000)
    assert DT.stypes == (dt.str32, )
    RES = DT[:, dt.str32(f.BIG)]
    assert RES.stypes == (dt.str64, )
    assert RES[-1, 0] == DT[0, 0]
Example #7
0
def test_dt_repeat_multicol():
    f0 = dt.Frame(A=[None, 1.4, -2.6, 3.9998],
                  B=["row", "row", "row", "your boat"],
                  C=[25, -9, 18, 2],
                  D=[True, None, True, False])
    f1 = dt.repeat(f0, 4)
    frame_integrity_check(f1)
    assert isview(f1)
    assert f1.names == f0.names
    assert f1.stypes == f0.stypes
    assert f1.to_list() == [col * 4 for col in f0.to_list()]
Example #8
0
def test_materialize():
    DT1 = dt.Frame(A=range(12))[::2, :]
    DT2 = dt.repeat(dt.Frame(B=["red", "green", "blue"]), 2)
    DT3 = dt.Frame(C=[4, 2, 9.1, 12, 0])
    DT = dt.cbind(DT1, DT2, DT3, force=True)
    assert frame_column_rowindex(DT, 0).type == "slice"
    assert frame_column_rowindex(DT, 1).type == "arr32"
    assert frame_column_rowindex(DT, 2) is None
    DT.materialize()
    assert frame_column_rowindex(DT, 0) is None
    assert frame_column_rowindex(DT, 1) is None
    assert frame_column_rowindex(DT, 2) is None
Example #9
0
def test_dt_repeat_empty_frame():
    f0 = dt.Frame()
    f1 = dt.repeat(f0, 5)
    frame_integrity_check(f1)
    assert f1.to_list() == []
Example #10
0
def test_dt_repeat_view():
    f0 = dt.Frame(A=[1, 3, 4, 5], B=[2, 6, 3, 1])
    f1 = f0[::2, :]
    f2 = dt.repeat(f1, 5)
    frame_integrity_check(f2)
    assert f2.to_dict() == {"A": [1, 4] * 5, "B": [2, 3] * 5}
Example #11
0
def test_dt_repeat2():
    f0 = dt.Frame(["A", "B", "CDE"])
    f1 = dt.repeat(f0, 7)
    frame_integrity_check(f1)
    assert f1.to_list() == [f0.to_list()[0] * 7]
Example #12
0
def test_dt_repeat():
    f0 = dt.Frame(range(10))
    f1 = dt.repeat(f0, 3)
    frame_integrity_check(f1)
    assert f1.to_list() == [list(range(10)) * 3]
Example #13
0
def make_datatable(dt,
                   rows,
                   select,
                   groupby=None,
                   join=None,
                   sort=None,
                   engine=None,
                   mode=None,
                   replacement=None):
    """
    Implementation of the `Frame.__call__()` method.

    This is the "main" function in the module; it is responsible for
    evaluating various transformations when they are applied to a target
    Frame.
    """
    if isinstance(groupby, datatable.join):
        join = groupby
        groupby = None
    update_mode = mode == "update"
    delete_mode = mode == "delete"
    jframe = join.joinframe if join else None
    with f.bind_datatable(dt), g.bind_datatable(jframe):
        ee = make_engine(engine, dt, jframe)
        ee.rowindex = dt.internal.rowindex
        rowsnode = make_rowfilter(rows, ee)
        grbynode = make_groupby(groupby, ee)
        colsnode = make_columnset(select, ee, update_mode)
        sortnode = make_sort(sort, ee)

        if join:
            join.execute(ee)

        if sortnode:
            if isinstance(rowsnode, AllRFNode) and not grbynode:
                rowsnode = SortedRFNode(sortnode)
            else:  # pragma: no cover
                raise NotImplementedError(
                    "Cannot yet apply sort argument to a view datatable or "
                    "combine with rows / groupby argument.")

        assert not delete_mode
        if update_mode:
            assert grbynode is None
            allrows = isinstance(rowsnode, AllRFNode)
            # Without `materialize`, when an update is applied to a view,
            # `rowsnode.execute()` will merge the rowindex implied by
            # `rowsnode` with its parent's rowindex. This will cause the
            # parent's data to be updated, which is wrong.
            dt.materialize()
            if isinstance(replacement, (int, float, str, type(None))):
                replacement = datatable.Frame([replacement])
                if allrows:
                    replacement = datatable.repeat(replacement, dt.nrows)
            elif isinstance(replacement, datatable.Frame):
                pass
            elif isinstance(replacement, BaseExpr):
                _col = replacement.evaluate_eager(ee)
                _colset = core.columns_from_columns([_col])
                replacement = _colset.to_frame(None)
            else:
                replacement = datatable.Frame(replacement)
            rowsnode.execute()
            colsnode.execute_update(dt, replacement)
            return

        rowsnode.execute()
        if grbynode:
            grbynode.execute(ee)

        colsnode.execute()
        res_dt = ee.columns.to_frame(colsnode.column_names)
        if grbynode and res_dt.nrows == dt.nrows:
            res_dt.internal.groupby = ee.groupby
        return res_dt

    raise RuntimeError("Unable to calculate the result")  # pragma: no cover
Example #14
0
def test_dt_repeat_empty_frame():
    f0 = dt.Frame()
    f1 = dt.repeat(f0, 5)
    f1.internal.check()
    assert f1.to_list() == []
Example #15
0
def test_dt_repeat2():
    f0 = dt.Frame(["A", "B", "CDE"])
    f1 = dt.repeat(f0, 7)
    f1.internal.check()
    assert f1.to_list() == [f0.to_list()[0] * 7]
Example #16
0
def test_dt_repeat():
    f0 = dt.Frame(range(10))
    f1 = dt.repeat(f0, 3)
    f1.internal.check()
    assert f1.to_list() == [list(range(10)) * 3]