def test_weight(): dm = DataMatrix(length=3) dm.a = 'a', 'b', 'c' dm.b = 1, 0, 2 dm = ops.weight(dm.b) check_col(dm.a, ['a', 'c', 'c']) check_col(dm.b, [1, 2, 2])
def test_fullfactorial(): dm = DataMatrix(length=3) dm.a = 'a', 'b', '' dm.b = 0, 1, 2 dm = ops.fullfactorial(dm) check_col(dm.a, ['a', 'b', 'a', 'b', 'a', 'b']) check_col(dm.b, [0, 0, 1, 1, 2, 2])
def test_keep_only(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 ops.keep_only(dm, ['b']) ok_('a' not in dm.column_names) ok_('b' in dm.column_names)
def test_group(): dm = DataMatrix(length=4) dm.a = 'b', 'b', 'a', 'a' dm.b = 'x', 'x', 'x', 'y' dm.c = IntColumn dm.c = 0, 1, 2, 3 dm = ops.group(dm, [dm.a, dm.b]) check_series(dm.c, [[3, np.nan], [2, np.nan], [0, 1]]) # Order guaranteed?
def test_group(): dm = DataMatrix(length=4) dm.a = 'b', 'b', 'a', 'a' dm.b = 'x', 'x', 'x', 'y' dm.c = IntColumn dm.c = 0, 1, 2, 3 dm = ops.group(dm, [dm.a, dm.b]) check_series(dm.c, [[2, np.nan], [3, np.nan], [0, 1]])
def test_auto_type(): dm = DataMatrix(length=2) dm.a = 'a', 1 dm.b = 0.1, 1 dm.c = 0, 1 dm = ops.auto_type(dm) assert isinstance(dm.a, MixedColumn) assert isinstance(dm.b, FloatColumn) assert isinstance(dm.c, IntColumn)
def test_tuple_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 dma, dmb = ops.tuple_split(dm.a, 'a', 'b') check_col(dma.a, ['a', 'a']) check_col(dma.b, [0, 1]) check_col(dmb.a, ['b', 'b']) check_col(dmb.b, [2, 3])
def test_auto_type(): dm = DataMatrix(length=2) dm.a = 'a', 1 dm.b = 0.1, 1 dm.c = 0, 1 ops.auto_type(dm) ok_(isinstance(dm.a, MixedColumn)) ok_(isinstance(dm.b, FloatColumn)) ok_(isinstance(dm.c, IntColumn))
def test_sort(): dm = DataMatrix(length=2) dm.a = 'b', 'a' dm.b = 1, 0 dm.a = ops.sort(dm.a) check_col(dm.a, ['a', 'b']) check_col(dm.b, [1, 0]) dm = ops.sort(dm, by=dm.b) check_col(dm.a, ['b', 'a']) check_col(dm.b, [0, 1])
def test_keep_only(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = 'y', 'z' for cols in (['b', 'c'], [dm.b, dm.c]): dm = ops.keep_only(dm, *cols) assert 'a' not in dm.column_names assert 'b' in dm.column_names assert 'c' in dm.column_names
def test_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 g = ops.split(dm.a) val, dm = g.next() eq_(val, 'a') check_col(dm.a, ['a', 'a']) check_col(dm.b, [0, 1]) val, dm = g.next() eq_(val, 'b') check_col(dm.a, ['b', 'b']) check_col(dm.b, [2, 3])
def test_shuffle(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 while True: dm.a = ops.shuffle(dm.a) check_col(dm.b, [0, 1]) try: check_col(dm.a, ['b', 'a']) break except: pass dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 while True: dm = ops.shuffle(dm) try: check_col(dm.a, ['b', 'a']) check_col(dm.b, [1, 0]) break except: pass
def test_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 # Without values g = ops.split(dm.a) val, dm = next(g) assert val == 'a' check_col(dm.a, ['a', 'a']) check_col(dm.b, [0, 1]) val, dm = next(g) assert val == 'b' check_col(dm.a, ['b', 'b']) check_col(dm.b, [2, 3]) # With values dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 dma, dmb = ops.split(dm.a, 'a', 'b') check_col(dma.a, ['a', 'a']) check_col(dma.b, [0, 1]) check_col(dmb.a, ['b', 'b']) check_col(dmb.b, [2, 3])
def test_group(): dm = DataMatrix(length=4) dm.a = 'b', 'b', 'a', 'a' dm.b = 'x', 'x', 'x', 'y' dm.c = IntColumn dm.c = 0, 1, 2, 3 dm = ops.group(dm, [dm.a, dm.b]) # Assert that at least one of the permutations passes for ref in itertools.permutations([[3, np.nan], [2, np.nan], [0, 1]]): try: check_series(dm.c, ref) break except AssertionError: pass else: assert (False)
def test_shuffle_horiz(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = '-', '-' while True: dm2 = ops.shuffle_horiz(dm) try: check_row(dm2[0], [0, '-', 'a']) break except: pass while True: dm2 = ops.shuffle_horiz(dm.a, dm.b) try: check_row(dm2[0], [0, 'a', '-']) break except: pass for i in range(1000): dm2 = ops.shuffle_horiz(dm.a, dm.b) check_col(dm.c, ['-', '-'])
def test_shuffle_horiz(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = '-', '-' while True: dm2 = ops.shuffle_horiz(dm) try: check_row(dm2[0], [0, '-', 'a']) break except: pass while True: dm2 = ops.shuffle_horiz(dm.a, dm.b) try: check_row(dm2[0], [0, 'a', '-']) break except: pass for i in range(1000): dm2 = ops.shuffle_horiz(dm.a, dm.b) check_col(dm.c, ['-', '-']) ops.shuffle_horiz(dm.a)
def test_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 # Without values g = ops.split(dm.a) val, dm = next(g) assert val == 'a' check_col(dm.a, ['a', 'a']) check_col(dm.b, [0, 1]) val, dm = next(g) assert val == 'b' check_col(dm.a, ['b', 'b']) check_col(dm.b, [2, 3]) # With values dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 dma, dmb = ops.split(dm.a, 'a', 'b') check_col(dma.a, ['a', 'a']) check_col(dma.b, [0, 1]) check_col(dmb.a, ['b', 'b']) check_col(dmb.b, [2, 3]) # With multiple columns dm = DataMatrix(length=8) dm.A = 0, 0, 1, 1, 0, 0, 1, 1 dm.B = 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b' dm.C = 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y' g = ops.split(dm.A, dm.B) val1, val2, sdm = next(g) assert val1 == 0 assert val2 == 'a' assert (len(sdm) == 2) val1, val2, sdm = next(g) assert val1 == 0 assert val2 == 'b' assert (len(sdm) == 2) val1, val2, sdm = next(g) assert val1 == 1 assert val2 == 'a' assert (len(sdm) == 2) val1, val2, sdm = next(g) assert val1 == 1 assert val2 == 'b' assert (len(sdm) == 2) g = ops.split(dm.A, dm.B, dm.C) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'a' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'a' assert val3 == 'y' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'b' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'b' assert val3 == 'y' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'a' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'a' assert val3 == 'y' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'b' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'b' assert val3 == 'y' assert (len(sdm) == 1)