def test_intcolumn(): _test_numericcolumn(IntColumn) _test_copying(IntColumn) # Test automatic conversion to int dm = DataMatrix(length=2) dm.col = IntColumn dm.col = 1.9, '2.9' check_col(dm.col, [1, 2]) # Test setting invalid values @raises(TypeError) def _(): dm.col[0] = 'x' _() @raises(TypeError) def _(): dm.col = 'x' _() @raises(TypeError) def _(): dm.col[:-1] = 'x' _() # Check dtype ok_(dm.col._seq.dtype == np.int64) check_integrity(dm)
def test_floatcolumn(): _test_numericcolumn(FloatColumn) _test_copying(FloatColumn) # Test automatic conversion to float dm = DataMatrix(length=2) dm.col = FloatColumn dm.col = 1.9, '2.9' check_col(dm.col, [1.9, 2.9]) # Test nans dm.col = 'nan' check_col(dm.col, [np.nan, np.nan]) dm.col = None check_col(dm.col, [np.nan, np.nan]) dm.col = np.nan check_col(dm.col, [np.nan, np.nan]) dm.col = 'x' check_col(dm.col, [np.nan, np.nan]) # Test infs dm.col = 'inf' check_col(dm.col, [np.inf, np.inf]) dm.col = np.inf check_col(dm.col, [np.inf, np.inf]) # Test nans and infs dm.col = 'nan', 'inf' check_col(dm.col, [np.nan, np.inf]) dm.col = np.inf, np.nan check_col(dm.col, [np.inf, np.nan]) dm.col = 'x', None check_col(dm.col, [np.nan, np.nan]) # Check dtype ok_(dm.col._seq.dtype == np.float64) check_integrity(dm)
def check_select(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, 2 dm_ = dm.col < 2 check_col(dm_.col, [1]) dm_ = dm.col == 2 check_col(dm_.col, [2]) dm_ = (dm.col == 1) | (dm.col == 2) # or check_col(dm_.col, [1, 2]) dm_ = (dm.col == 1) & (dm.col == 2) # and check_col(dm_.col, []) dm_ = (dm.col == 1) ^ (dm.col == 2) # xor check_col(dm_.col, [1, 2]) # Pair-wise select by matching-length sequence dm_ = dm.col == (1, 3) check_col(dm_.col, [1]) # Check by set multimatching dm_ = dm.col == {2, 3, 4} check_col(dm_.col, [2]) dm_ = dm.col != {1, 3, 4} check_col(dm_.col, [2]) # Check by lambda comparison dm_ = dm.col == (lambda x: x == 2) check_col(dm_.col, [2]) dm_ = dm.col != (lambda x: x == 2) check_col(dm_.col, [1]) check_integrity(dm)
def test_intcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = IntColumn dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) @raises(TypeError) def _(): dm.col[0] = "test" _() @raises(TypeError) def _(): dm.col[:] = "test" _() # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) # Check dtype ok_(dm.col._seq.dtype == np.int64) check_integrity(dm)
def test_seriescolumn(): dm1 = DataMatrix(length=2) dm1.col1 = SeriesColumn(2) dm1.col1 = 1, 2 dm1.col_shared = SeriesColumn(2) dm1.col_shared = 3, 4 dm2 = DataMatrix(length=2) dm2.col2 = SeriesColumn(2) dm2.col2 = 5, 6 dm2.col_shared = SeriesColumn(2) dm2.col_shared = 7, 8 dm3 = dm1 << dm2 check_series(dm3.col1, [[1, 1], [2, 2], [np.nan, np.nan], [np.nan, np.nan]]) check_series(dm3.col_shared, [[3, 3], [4, 4], [7, 7], [8, 8]]) check_series(dm3.col2, [[np.nan, np.nan], [np.nan, np.nan], [5, 5], [6, 6]]) dm3.i = [4, 0, 2, 1] dm4 = dm3.i <= 2 dm5 = (dm3.i <= 2) | (dm3.i >= 3) check_integrity(dm1) check_integrity(dm2) check_integrity(dm3) check_integrity(dm4) check_integrity(dm5)
def check_str_operations(): dm = DataMatrix(length=2, default_col_type=MixedColumn) dm.col = 'a', 'b' check_col(dm.col, ['a', 'b']) dm.col += 'c', 'd' check_col(dm.col, ['ac', 'bd']) check_integrity(dm)
def check_int_operations(): dm = DataMatrix(length=2, default_col_type=IntColumn) dm.col = 1.5, 2.5 check_col(dm.col, [1, 2]) dm.col *= 1.5 check_col(dm.col, [1, 3]) check_integrity(dm)
def test_reduce_(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=3) dm.series[0] = 1, 2, 3 dm.series[1] = 2, 3, 4 dm.col = series.reduce_(dm.series) check_col(dm.col, [2, 3]) check_integrity(dm)
def test_window(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=4) dm.series[0] = 0, 1, 1, 0 dm.series[1] = 0, 2, 2, 0 dm.window = series.window(dm.series, 1, 3) check_series(dm.window, [[1, 1], [2, 2]]) check_integrity(dm)
def _test_numericcolumn(cls): # Test init and change by single value dm = DataMatrix(length=2) dm.col = cls dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2 check_col(dm.col, [2, 2]) # Test init and change by sequence dm = DataMatrix(length=2) dm.col = cls dm.col = 1, 2 check_col(dm.col, [1, 2]) dm.col = 3, 4 check_col(dm.col, [3, 4]) # Test setting by slice dm = DataMatrix(length=3) dm.col = cls dm.col = 1 dm.col[1:] = 2 check_col(dm.col, [1, 2, 2]) dm.col[:-1] = 4, 3 check_col(dm.col, [4, 3, 2]) # Test shortening and lengthening dm = DataMatrix(length=4) dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) dm.col[dm.col == 2] = 0, 0 check_col(dm.col, [1, 0, 1, 0]) check_integrity(dm) # Check if numericcolumns return right type dm = DataMatrix(length=5) dm.col = cls dm.col = 1, 2, 3, 4, 5 # int -> float val = dm.col[2] ok_(isinstance(val, (int, float))) eq_(val, 3) # (int, int) -> FloatColumn val = dm.col[1, 3] ok_(isinstance(val, cls)) check_col(val, [2, 4]) # slice -> FloatColumn val = dm.col[1:-1] ok_(isinstance(val, cls)) check_col(val, [2, 3, 4]) # Check array setting and getting if cls != MixedColumn: a = dm.col.array ok_(isinstance(a, np.ndarray)) eq_(a.shape, (5, )) ok_(all(a == [1, 2, 3, 4, 5]))
def check_float_operations(): dm = DataMatrix(length=2, default_col_type=FloatColumn) dm.col = 1, 2 check_col(dm.col, [1, 2]) dm.col *= 2.5 check_col(dm.col, [2.5, 5]) dm.col *= np.inf, np.nan check_col(dm.col, [np.inf, np.nan]) check_integrity(dm)
def check_shuffle(col_type): dm = DataMatrix(length=3, default_col_type=col_type) dm.col1 = 11,12,13 dm.col2 = 1,2,3 dm = operations.shuffle(dm) for row in dm: ok_(row.col1 == row.col2+10) dm.col1 = operations.shuffle(dm.col1) dm.col2 = operations.shuffle(dm.col2) check_integrity(dm)
def test_downsample(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=10) dm.series[0] = range(10) dm.series[1] = [0, 1] * 5 dm.d3 = series.downsample(dm.series, 3) dm.d5 = series.downsample(dm.series, 5) check_series(dm.d3, [[1, 4, 7], [1. / 3, 2. / 3, 1. / 3]]) check_series(dm.d5, [[2, 7], [.4, .6]]) check_integrity(dm)
def check_select(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, 2 dm_ = dm.col < 2 check_col(dm_.col, [1]) dm_ = dm.col == 2 check_col(dm_.col, [2]) dm_ = (dm.col == 1) | (dm.col == 2) check_col(dm_.col, [1,2]) dm_ = (dm.col == 1) & (dm.col == 2) check_col(dm_.col, []) dm_ = (dm.col == 1) ^ (dm.col == 2) check_col(dm_.col, [1,2]) check_integrity(dm)
def check_nan_sort(): dm = DataMatrix(length=3, default_col_type=FloatColumn) dm.col1 = 2,np.nan,1 dm.col2 = 1,2,np.nan dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [np.nan, 1, 2]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [2, np.nan, 1]) check_col(dm.col2, [1, 2, np.nan]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [1, 2, np.nan]) check_integrity(dm)
def check_sort(col_type): dm = DataMatrix(length=3, default_col_type=col_type) dm.col1 = 3,2,1 dm.col2 = 1,2,3 dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [3, 2, 1]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [3, 2, 1]) check_col(dm.col2, [1, 2, 3]) dm.col2 = operations.sort(dm.col2, by=dm.col1) check_col(dm.col2, [3, 2, 1]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [1, 2, 3]) check_integrity(dm)
def test_mixedcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) dm.col[:] = "test" check_col(dm.col, ["test", "test"]) # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) check_integrity(dm)
def test_seriescolumn(): dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) # Set all rows to a single value dm.col = 1 check_series(dm.col, [[1, 1, 1], [1, 1, 1]]) # Set rows to different single values dm.col = 2, 3 check_series(dm.col, [[2, 2, 2], [3, 3, 3]]) # Set one row to a single value dm.col[0] = 4 check_series(dm.col, [[4, 4, 4], [3, 3, 3]]) # Set one row to different single values dm.col[1] = 5, 6, 7 check_series(dm.col, [[4, 4, 4], [5, 6, 7]]) # Set all rows to different single values dm.col.setallrows([8, 9, 10]) check_series(dm.col, [[8, 9, 10], [8, 9, 10]]) # Set the first value in all rows dm.col[:, 0] = 1 check_series(dm.col, [[1, 9, 10], [1, 9, 10]]) # Set all values in the first row dm.col[0, :] = 2 check_series(dm.col, [[2, 2, 2], [1, 9, 10]]) # Set all values dm.col[:, :] = 3 check_series(dm.col, [[3, 3, 3], [3, 3, 3]]) # Test shortening and lengthening dm.length = 0 check_series(dm.col, []) dm.length = 3 dm.col = 1, 2, 3 dm.col.depth = 1 check_series(dm.col, [[1], [2], [3]]) dm.col.depth = 3 check_series(dm.col, [[1, 0, 0], [2, 0, 0], [3, 0, 0]]) check_integrity(dm)
def test_floatcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = FloatColumn dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) dm.col[:] = "test" for value in dm.col: ok_(np.isnan(value)) # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) # Check dtype ok_(dm.col._seq.dtype == np.float64) check_integrity(dm)
def check_operations(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, 2 dm.col += 1 check_col(dm.col, [2, 3]) dm.col += 1, 2 check_col(dm.col, [3, 5]) dm.col -= 1 check_col(dm.col, [2, 4]) dm.col -= 1, 2 check_col(dm.col, [1, 2]) dm.col *= 2 check_col(dm.col, [2, 4]) dm.col *= 1.5, 3 check_col(dm.col, [3, 12]) dm.col /= 3 check_col(dm.col, [1, 4]) dm.col /= 1, 2 check_col(dm.col, [1, 2]) dm.col //= 1.5, 2.5 check_col(dm.col, [0, 0]) check_integrity(dm)
def test_seriescolumn(): dm1 = DataMatrix(length=2) dm1.col1 = SeriesColumn(2) dm1.col1 = 1, 2 dm1.col_shared = SeriesColumn(2) dm1.col_shared = 3, 4 dm2 = DataMatrix(length=2) dm2.col2 = SeriesColumn(2) dm2.col2 = 5, 6 dm2.col_shared = SeriesColumn(2) dm2.col_shared = 7, 8 dm3 = dm1 << dm2 check_series(dm3.col1, [[1,1],[2,2],[0,0],[0,0]]) check_series(dm3.col_shared, [[3,3],[4,4],[7,7],[8,8]]) check_series(dm3.col2, [[0,0],[0,0],[5,5],[6,6]]) dm3.i = [4,0,2,1] dm4 = dm3.i <= 2 dm5 = (dm3.i <= 2) | (dm3.i >= 3) check_integrity(dm1) check_integrity(dm2) check_integrity(dm3) check_integrity(dm4) check_integrity(dm5)