def test_map_(): for coltype in (MixedColumn, FloatColumn, IntColumn): dm = DataMatrix(length=2, default_col_type=coltype) dm.a = 1, 2 dm.a = fnc.map_(lambda x: x * 2, dm.a) eq_(dm.a, [2, 4]) ok_(isinstance(dm.a, coltype)) dm = fnc.map_(lambda **d: {'a': 0}, dm) eq_(dm.a, [0, 0]) ok_(isinstance(dm.a, coltype))
def test_sort(): dm = DataMatrix(length=2) dm.a = 'b', 'a' dm.b = 1, 0 dm.a = ops.sort(dm.a) check_col(dm.a, ['a', 'b']) check_col(dm.b, [1, 0]) dm = ops.sort(dm, by=dm.b) check_col(dm.a, ['b', 'a']) check_col(dm.b, [0, 1])
def test_z(): dm = DataMatrix(length=5) dm.a = range(-2, 3) dm.z = ops.z(dm.a) for x, y in zip(dm.z, [-1.26, -0.63, 0, .63, 1.26]): assert (abs(x - y) < .1)
def test_z(): dm = DataMatrix(length=5) dm.a = range(-2,3) dm.z = ops.z(dm.a) for x, y in zip(dm.z, [-1.26, -0.63, 0, .63, 1.26]): assert(abs(x-y) < .1)
def test_weight(): dm = DataMatrix(length=3) dm.a = 'a', 'b', 'c' dm.b = 1, 0, 2 dm = ops.weight(dm.b) check_col(dm.a, ['a', 'c', 'c']) check_col(dm.b, [1, 2, 2])
def test_fullfactorial(): dm = DataMatrix(length=3) dm.a = 'a', 'b', '' dm.b = 0, 1, 2 dm = ops.fullfactorial(dm) check_col(dm.a, ['a', 'b', 'a', 'b', 'a', 'b']) check_col(dm.b, [0, 0, 1, 1, 2, 2])
def test_keep_only(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 ops.keep_only(dm, ['b']) ok_('a' not in dm.column_names) ok_('b' in dm.column_names)
def test_z(): dm = DataMatrix(length=5) dm.a = range(-2, 3) dm.z = ops.z(dm.a) for test, ref in zip(dm.z, [-1.26, -0.63, 0, .63, 1.26]): assert (math.isclose(test, ref, abs_tol=.01)) # Add a non-numeric value, which should be ignored and its z value should # be NAN. dm.length = 6 dm.z = ops.z(dm.a) assert (dm.z[5] != dm.z[5]) for test, ref in zip(dm.z[:-1], [-1.26, -0.63, 0, .63, 1.26]): assert (math.isclose(test, ref, abs_tol=.01)) # If there is no variability, the z-scores should be NAN dm.a = 2 dm.z = ops.z(dm.a) assert (all(ref != ref for ref in dm.z))
def test_group(): dm = DataMatrix(length=4) dm.a = 'b', 'b', 'a', 'a' dm.b = 'x', 'x', 'x', 'y' dm.c = IntColumn dm.c = 0, 1, 2, 3 dm = ops.group(dm, [dm.a, dm.b]) check_series(dm.c, [[3, np.nan], [2, np.nan], [0, 1]]) # Order guaranteed?
def test_group(): dm = DataMatrix(length=4) dm.a = 'b', 'b', 'a', 'a' dm.b = 'x', 'x', 'x', 'y' dm.c = IntColumn dm.c = 0, 1, 2, 3 dm = ops.group(dm, [dm.a, dm.b]) check_series(dm.c, [[2, np.nan], [3, np.nan], [0, 1]])
def test_auto_type(): dm = DataMatrix(length=2) dm.a = 'a', 1 dm.b = 0.1, 1 dm.c = 0, 1 ops.auto_type(dm) ok_(isinstance(dm.a, MixedColumn)) ok_(isinstance(dm.b, FloatColumn)) ok_(isinstance(dm.c, IntColumn))
def test_auto_type(): dm = DataMatrix(length=2) dm.a = 'a', 1 dm.b = 0.1, 1 dm.c = 0, 1 dm = ops.auto_type(dm) assert isinstance(dm.a, MixedColumn) assert isinstance(dm.b, FloatColumn) assert isinstance(dm.c, IntColumn)
def test_filter_(): dm = DataMatrix(length=4) dm.a = range(4) odd = fnc.filter_(lambda x: x % 2, dm.a) ok_(all([x % 2 for x in odd])) print(type(dm._rowid)) dm = fnc.filter_(lambda **d: d['a'] % 2, dm) print(type(dm._rowid)) eq_(dm.a, [1, 3])
def test_tuple_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 dma, dmb = ops.tuple_split(dm.a, 'a', 'b') check_col(dma.a, ['a', 'a']) check_col(dma.b, [0, 1]) check_col(dmb.a, ['b', 'b']) check_col(dmb.b, [2, 3])
def test_replace(): dm = DataMatrix(length=3) dm.a = 0, 1, 2 dm.c = FloatColumn dm.c = np.nan, 1, 2 dm.s = SeriesColumn(depth=3) dm.s[0] = 0, 1, 2 dm.s[1] = np.nan, 1, 2 dm.s[2] = np.nan, 1, 2 dm.a = ops.replace(dm.a, {0: 100, 2: 200}) dm.c = ops.replace(dm.c, {np.nan: 100, 2: np.nan}) dm.s = ops.replace(dm.s, {np.nan: 100, 2: np.nan}) check_col(dm.a, [100, 1, 200]) check_col(dm.c, [100, 1, np.nan]) check_series(dm.s, [ [0, 1, np.nan], [100, 1, np.nan], [100, 1, np.nan], ])
def test_keep_only(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = 'y', 'z' for cols in (['b', 'c'], [dm.b, dm.c]): dm = ops.keep_only(dm, *cols) assert 'a' not in dm.column_names assert 'b' in dm.column_names assert 'c' in dm.column_names
def test_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 g = ops.split(dm.a) val, dm = g.next() eq_(val, 'a') check_col(dm.a, ['a', 'a']) check_col(dm.b, [0, 1]) val, dm = g.next() eq_(val, 'b') check_col(dm.a, ['b', 'b']) check_col(dm.b, [2, 3])
def test_random_sample(): dm = DataMatrix(length=3) dm.a = 0, 1, 2 options = [[0, 1], [0, 2], [1, 2], [1, 0], [2, 0], [2, 1]] o = options[:] while o: col = ops.random_sample(dm.a, k=2) if list(col) in o: o.remove(list(col)) o = options[:] while o: dm2 = ops.random_sample(dm, k=2) if list(dm2.a) in o: o.remove(list(dm2.a))
def test_shuffle(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 while True: dm.a = ops.shuffle(dm.a) check_col(dm.b, [0, 1]) try: check_col(dm.a, ['b', 'a']) break except: pass dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 while True: dm = ops.shuffle(dm) try: check_col(dm.a, ['b', 'a']) check_col(dm.b, [1, 0]) break except: pass
def test_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 # Without values g = ops.split(dm.a) val, dm = next(g) assert val == 'a' check_col(dm.a, ['a', 'a']) check_col(dm.b, [0, 1]) val, dm = next(g) assert val == 'b' check_col(dm.a, ['b', 'b']) check_col(dm.b, [2, 3]) # With values dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 dma, dmb = ops.split(dm.a, 'a', 'b') check_col(dma.a, ['a', 'a']) check_col(dma.b, [0, 1]) check_col(dmb.a, ['b', 'b']) check_col(dmb.b, [2, 3])
def test_group(): dm = DataMatrix(length=4) dm.a = 'b', 'b', 'a', 'a' dm.b = 'x', 'x', 'x', 'y' dm.c = IntColumn dm.c = 0, 1, 2, 3 dm = ops.group(dm, [dm.a, dm.b]) # Assert that at least one of the permutations passes for ref in itertools.permutations([[3, np.nan], [2, np.nan], [0, 1]]): try: check_series(dm.c, ref) break except AssertionError: pass else: assert (False)
def test_bin_split(): dm = DataMatrix(length=4) dm.a = range(4) dm = ops.shuffle(dm) dm1, dm2 = ops.bin_split(dm.a, 2) check_col(dm1.a, [0,1]) check_col(dm2.a, [2,3]) dm1, dm2, dm3 = ops.bin_split(dm.a, 3) check_col(dm1.a, [0]) check_col(dm2.a, [1]) check_col(dm3.a, [2,3]) dm1, = ops.bin_split(dm.a, 1) check_col(dm1.a, [0,1,2,3]) @raises(ValueError) def _(): x, = ops.bin_split(dm.a, 5) _()
def test_bin_split(): dm = DataMatrix(length=4) dm.a = range(4) dm = ops.shuffle(dm) dm1, dm2 = ops.bin_split(dm.a, 2) check_col(dm1.a, [0, 1]) check_col(dm2.a, [2, 3]) dm1, dm2, dm3 = ops.bin_split(dm.a, 3) check_col(dm1.a, [0]) check_col(dm2.a, [1]) check_col(dm3.a, [2, 3]) dm1, = ops.bin_split(dm.a, 1) check_col(dm1.a, [0, 1, 2, 3]) def _(): with pytest.raises(ValueError): x, = ops.bin_split(dm.a, 5) _()
def test_shuffle_horiz(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = '-', '-' while True: dm2 = ops.shuffle_horiz(dm) try: check_row(dm2[0], [0, '-', 'a']) break except: pass while True: dm2 = ops.shuffle_horiz(dm.a, dm.b) try: check_row(dm2[0], [0, 'a', '-']) break except: pass for i in range(1000): dm2 = ops.shuffle_horiz(dm.a, dm.b) check_col(dm.c, ['-', '-'])
def test_shuffle_horiz(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = '-', '-' while True: dm2 = ops.shuffle_horiz(dm) try: check_row(dm2[0], [0, '-', 'a']) break except: pass while True: dm2 = ops.shuffle_horiz(dm.a, dm.b) try: check_row(dm2[0], [0, 'a', '-']) break except: pass for i in range(1000): dm2 = ops.shuffle_horiz(dm.a, dm.b) check_col(dm.c, ['-', '-']) ops.shuffle_horiz(dm.a)
def test_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 # Without values g = ops.split(dm.a) val, dm = next(g) assert val == 'a' check_col(dm.a, ['a', 'a']) check_col(dm.b, [0, 1]) val, dm = next(g) assert val == 'b' check_col(dm.a, ['b', 'b']) check_col(dm.b, [2, 3]) # With values dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 dma, dmb = ops.split(dm.a, 'a', 'b') check_col(dma.a, ['a', 'a']) check_col(dma.b, [0, 1]) check_col(dmb.a, ['b', 'b']) check_col(dmb.b, [2, 3]) # With multiple columns dm = DataMatrix(length=8) dm.A = 0, 0, 1, 1, 0, 0, 1, 1 dm.B = 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b' dm.C = 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y' g = ops.split(dm.A, dm.B) val1, val2, sdm = next(g) assert val1 == 0 assert val2 == 'a' assert (len(sdm) == 2) val1, val2, sdm = next(g) assert val1 == 0 assert val2 == 'b' assert (len(sdm) == 2) val1, val2, sdm = next(g) assert val1 == 1 assert val2 == 'a' assert (len(sdm) == 2) val1, val2, sdm = next(g) assert val1 == 1 assert val2 == 'b' assert (len(sdm) == 2) g = ops.split(dm.A, dm.B, dm.C) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'a' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'a' assert val3 == 'y' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'b' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'b' assert val3 == 'y' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'a' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'a' assert val3 == 'y' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'b' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'b' assert val3 == 'y' assert (len(sdm) == 1)