def test_intcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = IntColumn dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) @raises(TypeError) def _(): dm.col[0] = "test" _() @raises(TypeError) def _(): dm.col[:] = "test" _() # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) # Check dtype ok_(dm.col._seq.dtype == np.int64) check_integrity(dm)
def check_desc_stats(col_type, invalid, assert_invalid): dm = DataMatrix(length=4, default_col_type=col_type) # Even lengths dm.col = 1, 2, 3, 10 check_even(dm) if col_type is not IntColumn: dm.length = 5 dm.col = 1, 2, 3, 10, invalid check_even(dm) # Odd lengths (and even with one invalid) dm.length = 3 dm.col = 1, 2, 10 check_odd(dm) if col_type is not IntColumn: dm.length = 4 dm.col[3] = invalid check_odd(dm) # One lengths dm.length = 1 dm.col = 1 eq_(dm.col.mean, 1) eq_(dm.col.median, 1) assert_invalid(dm.col.std) eq_(dm.col.min, 1) eq_(dm.col.max, 1) eq_(dm.col.sum, 1) # Zero lengths dm.length = 0 assert_invalid(dm.col.mean) assert_invalid(dm.col.median) assert_invalid(dm.col.std) assert_invalid(dm.col.min) assert_invalid(dm.col.max) assert_invalid(dm.col.sum)
def test_seriescolumn(): dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=2) dm.col[0] = 1, 2 dm.col[1] = 3, 4 dm.col += 1 check_series(dm.col, [[2,3], [4,5]]) dm.col += 1, 2 check_series(dm.col, [[3,4], [6,7]]) dm.col -= 1 check_series(dm.col, [[2,3], [5,6]]) dm.col -= 1, 2 check_series(dm.col, [[1,2], [3,4]]) dm.col *= 2 check_series(dm.col, [[2,4], [6,8]]) dm.col *= 1.5, 3 check_series(dm.col, [[3,6], [18,24]]) dm.col /= 3 check_series(dm.col, [[1,2], [6,8]]) dm.col /= 1, 2 check_series(dm.col, [[1,2], [3,4]]) dm.col //= 1.5, 2.5 check_series(dm.col, [[0,1], [1,1]]) dm.col += np.array([ [0,0], [10, 10] ]) check_series(dm.col, [[0,1], [11,11]])
def check_str_operations(): dm = DataMatrix(length=2, default_col_type=MixedColumn) dm.col = 'a', 'b' check_col(dm.col, ['a', 'b']) dm.col += 'c', 'd' check_col(dm.col, ['ac', 'bd']) check_integrity(dm)
def check_int_operations(): dm = DataMatrix(length=2, default_col_type=IntColumn) dm.col = 1.5, 2.5 check_col(dm.col, [1, 2]) dm.col *= 1.5 check_col(dm.col, [1, 3]) check_integrity(dm)
def test_mixedcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) dm.col[:] = "test" check_col(dm.col, ["test", "test"]) # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) check_integrity(dm)
def test_seriescolumn(): dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) # Set all rows to a single value dm.col = 1 check_series(dm.col, [[1, 1, 1], [1, 1, 1]]) # Set rows to different single values dm.col = 2, 3 check_series(dm.col, [[2, 2, 2], [3, 3, 3]]) # Set one row to a single value dm.col[0] = 4 check_series(dm.col, [[4, 4, 4], [3, 3, 3]]) # Set one row to different single values dm.col[1] = 5, 6, 7 check_series(dm.col, [[4, 4, 4], [5, 6, 7]]) # Set all rows to different single values dm.col.setallrows([8, 9, 10]) check_series(dm.col, [[8, 9, 10], [8, 9, 10]]) # Set the first value in all rows dm.col[:, 0] = 1 check_series(dm.col, [[1, 9, 10], [1, 9, 10]]) # Set all values in the first row dm.col[0, :] = 2 check_series(dm.col, [[2, 2, 2], [1, 9, 10]]) # Set all values dm.col[:, :] = 3 check_series(dm.col, [[3, 3, 3], [3, 3, 3]]) # Test shortening and lengthening dm.length = 0 check_series(dm.col, []) dm.length = 3 dm.col = 1, 2, 3 dm.col.depth = 1 check_series(dm.col, [[1], [2], [3]]) dm.col.depth = 3 check_series(dm.col, [[1, 0, 0], [2, 0, 0], [3, 0, 0]]) check_integrity(dm)
def test_floatcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = FloatColumn dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) dm.col[:] = "test" for value in dm.col: ok_(np.isnan(value)) # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) # Check dtype ok_(dm.col._seq.dtype == np.float64) check_integrity(dm)
def check_select(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, 2 dm_ = dm.col < 2 check_col(dm_.col, [1]) dm_ = dm.col == 2 check_col(dm_.col, [2]) dm_ = (dm.col == 1) | (dm.col == 2) check_col(dm_.col, [1,2]) dm_ = (dm.col == 1) & (dm.col == 2) check_col(dm_.col, []) dm_ = (dm.col == 1) ^ (dm.col == 2) check_col(dm_.col, [1,2]) check_integrity(dm)
def test_seriescolumn(): dm = DataMatrix(length=3) dm.col = SeriesColumn(depth=3) dm.col[0] = [1,2,3] dm.col[1] = [3,3,3] dm.col[2] = [4,4,4] ok_(all(dm.col.mean == [8./3, 9./3, 10/3.])) ok_(all(dm.col.median == [3,3,3])) ok_(all(dm.col.max == [4,4,4])) ok_(all(dm.col.min == [1,2,3])) ok_(all(dm.col.std == [ np.std([4,3,1], ddof=1), np.std([4,3,2], ddof=1), np.std([4,3,3], ddof=1) ]))
def check_operations(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, 2 dm.col += 1 check_col(dm.col, [2, 3]) dm.col += 1, 2 check_col(dm.col, [3, 5]) dm.col -= 1 check_col(dm.col, [2, 4]) dm.col -= 1, 2 check_col(dm.col, [1, 2]) dm.col *= 2 check_col(dm.col, [2, 4]) dm.col *= 1.5, 3 check_col(dm.col, [3, 12]) dm.col /= 3 check_col(dm.col, [1, 4]) dm.col /= 1, 2 check_col(dm.col, [1, 2]) dm.col //= 1.5, 2.5 check_col(dm.col, [0, 0]) check_integrity(dm)
def check_operations(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, '2' dm.col += 1 check_col(dm.col, [2, 3]) dm.col += 1, '2' check_col(dm.col, [3, 5]) dm.col -= 1 check_col(dm.col, [2, 4]) dm.col -= 1, '2' check_col(dm.col, [1, 2]) dm.col *= 2 check_col(dm.col, [2, 4]) dm.col *= 1, '2' check_col(dm.col, [2, 8]) dm.col /= 2 check_col(dm.col, [1, 4]) dm.col /= 1, '2' check_col(dm.col, [1, 2]) # Right-hand operations dm.col = 1 + dm.col check_col(dm.col, [2, 3]) dm.col = (1, 2) + dm.col check_col(dm.col, [3, 5]) dm.col = 5 - dm.col check_col(dm.col, [2, 0]) dm.col = (3, 1) - dm.col check_col(dm.col, [1, 1]) dm.col = 2 * dm.col check_col(dm.col, [2, 2]) dm.col = (1, 2) * dm.col check_col(dm.col, [2, 4]) dm.col = 4 / dm.col check_col(dm.col, [2, 1]) dm.col = (4, 2) / dm.col check_col(dm.col, [2, 2]) dm.col = 2 ** dm.col check_col(dm.col, [4, 4]) dm.col = (2, 4) ** dm.col check_col(dm.col, [16, 256]) dm.col = 17 % dm.col check_col(dm.col, [1, 17]) dm.col = (2, 16) % dm.col check_col(dm.col, [0, 16]) check_integrity(dm)
def test_seriescolumn(): dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=2) dm.col[0] = 1, 2 dm.col[1] = 3, 4 dm.col += 1 check_series(dm.col, [[2,3], [4,5]]) dm.col += 1, 2 check_series(dm.col, [[3,4], [6,7]]) dm.col -= 1 check_series(dm.col, [[2,3], [5,6]]) dm.col -= 1, 2 check_series(dm.col, [[1,2], [3,4]]) dm.col *= 2 check_series(dm.col, [[2,4], [6,8]]) dm.col *= 1.5, 3 check_series(dm.col, [[3,6], [18,24]]) dm.col /= 3 check_series(dm.col, [[1,2], [6,8]]) dm.col /= 1, 2 check_series(dm.col, [[1,2], [3,4]]) dm.col //= 1.5, 2.5 check_series(dm.col, [[0,1], [1,1]]) dm.col += np.array([ [0,0], [10, 10] ]) check_series(dm.col, [[0,1], [11,11]]) # Right-side operations dm.col[0] = 1, 2 dm.col[1] = 3, 4 dm.col = 1 + dm.col check_series(dm.col, [[2,3], [4,5]]) dm.col = (1, 2) + dm.col check_series(dm.col, [[3,4], [6,7]]) dm.col = 1 - dm.col check_series(dm.col, [[-2,-3], [-5,-6]]) dm.col = (1, 2) - dm.col check_series(dm.col, [[3, 4], [7, 8]]) dm.col = 2 * dm.col check_series(dm.col, [[6, 8], [14, 16]]) dm.col = (1.5, 3) * dm.col check_series(dm.col, [[9, 12], [42, 48]]) dm.col = 3 / dm.col check_series(dm.col, [[1./3, 1./4], [3./42, 1./16]]) dm.col = (1, 2) / dm.col check_series(dm.col, [[3, 4], [28, 32]]) dm.col = (1.5, 2.5) // dm.col check_series(dm.col, [[0, 0], [0, 0]]) dm.col = np.array([ [0, 0], [10, 10] ]) + dm.col check_series(dm.col, [[0, 0], [10, 10]])
def test_seriescolumn(): _test_copying(SeriesColumn(depth=1)) dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) # Set all rows to a single value dm.col = 1 check_series(dm.col, [[1, 1, 1], [1, 1, 1]]) # Set rows to different single values dm.col = 2, 3 check_series(dm.col, [[2, 2, 2], [3, 3, 3]]) # Set one row to a single value dm.col[0] = 4 check_series(dm.col, [[4, 4, 4], [3, 3, 3]]) # Set one row to different single values dm.col[1] = 5, 6, 7 check_series(dm.col, [[4, 4, 4], [5, 6, 7]]) # Set all rows to different single values dm.col.setallrows([8, 9, 10]) check_series(dm.col, [[8, 9, 10], [8, 9, 10]]) # Set the first value in all rows dm.col[:, 0] = 1 check_series(dm.col, [[1, 9, 10], [1, 9, 10]]) # Set all values in the first row dm.col[0, :] = 2 check_series(dm.col, [[2, 2, 2], [1, 9, 10]]) # Set all values dm.col[:, :] = 3 check_series(dm.col, [[3, 3, 3], [3, 3, 3]]) # Test shortening and lengthening dm.length = 0 check_series(dm.col, []) dm.length = 3 dm.col = 1, 2, 3 dm.col.depth = 1 check_series(dm.col, [[1], [2], [3]]) dm.col.depth = 3 check_series(dm.col, [[1, NAN, NAN], [2, NAN, NAN], [3, NAN, NAN]]) check_integrity(dm) # Test dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) dm.col = 1, 2 check_series(dm.col, [[1, 1, 1], [2, 2, 2]]) dm.col = 3, 4, 5 check_series(dm.col, [[3, 4, 5]] * 2) dm.col.depth = 2 dm.col[:] = 1, 2 check_series(dm.col, [[1, 1], [2, 2]]) dm.col[:, :] = 3, 4 check_series(dm.col, [[3, 4], [3, 4]]) # Check if series return right type dm = DataMatrix(length=4) dm.col = SeriesColumn(depth=5) dm.col = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [16, 17, 18, 19, 20]] # (int, int) -> float val = dm.col[2, 2] eq_(val, 13) eq_(type(val), float) # (int) -> array val = dm.col[2] ok_(all(val == np.array([11, 12, 13, 14, 15]))) eq_(type(val), np.ndarray) # (int, slice) -> array val = dm.col[2, 1:-1] ok_(all(val == np.array([12, 13, 14]))) eq_(type(val), np.ndarray) # (int, (int, int)) -> array val = dm.col[2, (1, 3)] ok_(all(val == np.array([12, 14]))) eq_(type(val), np.ndarray) # (slice) -> SeriesColumn val = dm.col[1:-1] check_series(val, [ [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], ]) # (slice, int) -> FloatColumn val = dm.col[1:-1, 2] ok_(isinstance(val, FloatColumn)) check_col(val, [8, 13]) # ((int, int), int) -> FloatColumn val = dm.col[(1, 3), 2] ok_(isinstance(val, FloatColumn)) check_col(val, [8, 18]) # (slice, slice) -> SeriesColumn val = dm.col[1:-1, 1:-1] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 8, 9], [12, 13, 14], ]) # ((int, int), slice) -> SeriesColumn val = dm.col[(1, 3), 1:-1] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 8, 9], [17, 18, 19], ]) # ((int, int), (int int)) -> SeriesColumn val = dm.col[(1, 3), (1, 3)] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 9], [17, 19], ])
def test_floatcolumn(): _test_numericcolumn(FloatColumn) _test_copying(FloatColumn) # Test automatic conversion to float dm = DataMatrix(length=2) dm.col = FloatColumn dm.col = 1.9, '2.9' check_col(dm.col, [1.9, 2.9]) # Test nans dm.col = 'nan' check_col(dm.col, [np.nan, np.nan]) dm.col = None check_col(dm.col, [np.nan, np.nan]) dm.col = np.nan check_col(dm.col, [np.nan, np.nan]) dm.col = 'x' check_col(dm.col, [np.nan, np.nan]) # Test infs dm.col = 'inf' check_col(dm.col, [np.inf, np.inf]) dm.col = np.inf check_col(dm.col, [np.inf, np.inf]) # Test nans and infs dm.col = 'nan', 'inf' check_col(dm.col, [np.nan, np.inf]) dm.col = np.inf, np.nan check_col(dm.col, [np.inf, np.nan]) dm.col = 'x', None check_col(dm.col, [np.nan, np.nan]) # Check dtype ok_(dm.col._seq.dtype == np.float64) check_integrity(dm)
def _test_numericcolumn(cls): # Test init and change by single value dm = DataMatrix(length=2) dm.col = cls dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2 check_col(dm.col, [2, 2]) # Test init and change by sequence dm = DataMatrix(length=2) dm.col = cls dm.col = 1, 2 check_col(dm.col, [1, 2]) dm.col = 3, 4 check_col(dm.col, [3, 4]) # Test setting by slice dm = DataMatrix(length=3) dm.col = cls dm.col = 1 dm.col[1:] = 2 check_col(dm.col, [1, 2, 2]) dm.col[:-1] = 4, 3 check_col(dm.col, [4, 3, 2]) # Test setting by DataMatrix dm = DataMatrix(length=10) dm.x = range(10) dm.y = FloatColumn dm = dm.x != {3, 6} dm.y[dm.x > 3] = 10 dm.y[dm.x >= 8] = 11 check_col(dm.y, [np.nan] * 3 + [10] * 3 + [11] * 2) # Test shortening and lengthening dm = DataMatrix(length=4) dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 assert sorted(dm.col.unique) == [1,2] dm.col[dm.col == 2] = 0, 0 check_col(dm.col, [1, 0, 1, 0]) check_integrity(dm) # Check if numericcolumns return right type dm = DataMatrix(length=5) dm.col = cls dm.col = 1, 2, 3, 4, 5 # int -> float val = dm.col[2] assert isinstance(val, (int, float)) assert val == 3 # (int, int) -> FloatColumn val = dm.col[1, 3] assert isinstance(val, cls) check_col(val, [2, 4]) # slice -> FloatColumn val = dm.col[1:-1] assert isinstance(val, cls) check_col(val, [2, 3, 4]) # Check array setting and getting if cls != MixedColumn: a = dm.col.array assert isinstance(a, np.ndarray) assert a.shape == (5,) assert all(a == [1, 2, 3, 4, 5])
def test_where(): dm = DataMatrix(length=4) dm.col = 1, 2, 3, 4 assert dm[dm.col == {2, 4}] == [1, 3]