def check_mixedcolumn_sorting(): dm = DataMatrix(length=24) dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf', NAN, NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta', 'None', '' ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ -INF, -INF, 1, 1, 1.1, 1.1, 2, 2, 2.1, 2.1, INF, INF, '', 'None', 'alpha', 'beta', None, None, None, None, NAN, NAN, NAN, NAN, ])
def test_group(): dm = DataMatrix(length=4) dm.a = 'b', 'b', 'a', 'a' dm.b = 'x', 'x', 'x', 'y' dm.c = IntColumn dm.c = 0, 1, 2, 3 dm = ops.group(dm, [dm.a, dm.b]) check_series(dm.c, [[2, np.nan], [3, np.nan], [0, 1]])
def test_group(): dm = DataMatrix(length=4) dm.a = 'b', 'b', 'a', 'a' dm.b = 'x', 'x', 'x', 'y' dm.c = IntColumn dm.c = 0, 1, 2, 3 dm = ops.group(dm, [dm.a, dm.b]) check_series(dm.c, [[3, np.nan], [2, np.nan], [0, 1]]) # Order guaranteed?
def check_intcolumn_sorting(): dm = DataMatrix(length=8, default_col_type=IntColumn) dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.8', ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ 1, 1, 1, 1, 2, 2, 2, 2 ])
def test_properties(): dm = DataMatrix(length=0) dm.c = -1 assert dm.empty dm = DataMatrix(length=1) assert dm.empty dm = DataMatrix(length=1) dm.c = -1 assert not dm.empty dm = DataMatrix(length=3) dm.c = -1 dm.d = -1 assert len(dm) == 3
def generatedata(effectsize=EFFECTSIZE, blinksinbaseline=BLINKSINBASELINE, **kwargs): dm = DataMatrix(length=TRACES) dm.c = IntColumn dm.c[:TRACES // 2] = 1 dm.c[TRACES // 2:] = 2 dm.y = SeriesColumn(depth=TRACELEN) dm.y.setallrows(a) dm.y += np.random.randint(NOISERANGE[0], NOISERANGE[1], TRACES) dm.y[TRACES // 2:] += np.linspace(0, effectsize, TRACELEN) # Inroduce blinks for i, row in enumerate(dm): blinklen = np.random.randint(BLINKLEN[0], BLINKLEN[1], BLINKS) if i < blinksinbaseline: blinkstart = np.array([1]) else: blinkstart = np.random.randint(BASELINE[1], TRACELEN, BLINKS) blinkend = blinkstart + blinklen for start, end in zip(blinkstart, blinkend): end = min(TRACELEN - 1, end) if end - start < 2 * BLINKMARGIN: continue row.y[start:start+BLINKMARGIN] = \ np.linspace(row.y[start-1], 0, BLINKMARGIN) row.y[end-BLINKMARGIN:end] = \ np.linspace(0, row.y[end], BLINKMARGIN) row.y[start:end] = np.random.randint(0, 100, end - start) return dm
def _test_numeric_properties(coltype, nan): dm = DataMatrix(length=4, default_col_type=coltype) dm.c = 1, 1, nan, 4 dm.d = [nan] * 4 assert dm.c.mean == 2 assert dm.c.median == 1 assert dm.c.std == np.std([1, 1, 4], ddof=1) assert dm.c.max == 4 assert dm.c.min == 1 assert dm.c.sum == 6 if coltype in (IntColumn, FloatColumn): with pytest.warns(RuntimeWarning): all_nan(dm.d.mean, nan) all_nan(dm.d.median, nan) all_nan(dm.d.std, nan) all_nan(dm.d.max, nan) all_nan(dm.d.min, nan) all_nan(dm.d.sum, nan) else: all_nan(dm.d.mean, nan) all_nan(dm.d.median, nan) all_nan(dm.d.std, nan) all_nan(dm.d.max, nan) all_nan(dm.d.min, nan) all_nan(dm.d.sum, nan)
def test_group(): dm = DataMatrix(length=4) dm.a = 'b', 'b', 'a', 'a' dm.b = 'x', 'x', 'x', 'y' dm.c = IntColumn dm.c = 0, 1, 2, 3 dm = ops.group(dm, [dm.a, dm.b]) # Assert that at least one of the permutations passes for ref in itertools.permutations([[3, np.nan], [2, np.nan], [0, 1]]): try: check_series(dm.c, ref) break except AssertionError: pass else: assert (False)
def test_auto_type(): dm = DataMatrix(length=2) dm.a = 'a', 1 dm.b = 0.1, 1 dm.c = 0, 1 dm = ops.auto_type(dm) assert isinstance(dm.a, MixedColumn) assert isinstance(dm.b, FloatColumn) assert isinstance(dm.c, IntColumn)
def test_auto_type(): dm = DataMatrix(length=2) dm.a = 'a', 1 dm.b = 0.1, 1 dm.c = 0, 1 ops.auto_type(dm) ok_(isinstance(dm.a, MixedColumn)) ok_(isinstance(dm.b, FloatColumn)) ok_(isinstance(dm.c, IntColumn))
def test_keep_only(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = 'y', 'z' for cols in (['b', 'c'], [dm.b, dm.c]): dm = ops.keep_only(dm, *cols) assert 'a' not in dm.column_names assert 'b' in dm.column_names assert 'c' in dm.column_names
def test_replace(): dm = DataMatrix(length=3) dm.a = 0, 1, 2 dm.c = FloatColumn dm.c = np.nan, 1, 2 dm.s = SeriesColumn(depth=3) dm.s[0] = 0, 1, 2 dm.s[1] = np.nan, 1, 2 dm.s[2] = np.nan, 1, 2 dm.a = ops.replace(dm.a, {0: 100, 2: 200}) dm.c = ops.replace(dm.c, {np.nan: 100, 2: np.nan}) dm.s = ops.replace(dm.s, {np.nan: 100, 2: np.nan}) check_col(dm.a, [100, 1, 200]) check_col(dm.c, [100, 1, np.nan]) check_series(dm.s, [ [0, 1, np.nan], [100, 1, np.nan], [100, 1, np.nan], ])
def _test_basic_properties(coltype): dm = DataMatrix(length=4, default_col_type=coltype) dm.c = 3, 1, 2, 3 dm.d = dm.c dm.e = 3, 1, 2, 3 eq_(dm.c.name, ['c', 'd']) eq_(dm.d.name, ['c', 'd']) eq_(dm.e.name, 'e') eq_(list(dm.c.unique), [1, 2, 3]) eq_(dm.c.count, 3)
def _test_basic_properties(coltype): dm = DataMatrix(length=4, default_col_type=coltype) dm.c = 3, 1, 2, 3 dm.d = dm.c dm.e = 3, 1, 2, 3 assert dm.c.name == ['c', 'd'] assert dm.d.name == ['c', 'd'] assert dm.e.name == 'e' assert list(dm.c.unique) == [1, 2, 3] assert dm.c.count == 3
def check_floatcolumn_sorting(): dm = DataMatrix(length=24, default_col_type=FloatColumn) with pytest.warns(UserWarning): dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf', NAN, NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta', 'None', '' ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ -INF, -INF, 1, 1, 1.1, 1.1, 2, 2, 2.1, 2.1, INF, INF, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, ])
def _test_numeric_properties(coltype, nan): dm = DataMatrix(length=4, default_col_type=coltype) dm.c = 1, 1, nan, 4 dm.d = [nan] * 4 eq_(dm.c.mean, 2) eq_(dm.c.median, 1) eq_(dm.c.std, np.std([1, 1, 4], ddof=1)) eq_(dm.c.max, 4) eq_(dm.c.min, 1) eq_(dm.c.sum, 6) all_nan(dm.d.mean, nan) all_nan(dm.d.median, nan) all_nan(dm.d.std, nan) all_nan(dm.d.max, nan) all_nan(dm.d.min, nan) all_nan(dm.d.sum, nan)
def _test_numeric_properties(coltype, nan): dm = DataMatrix(length=4, default_col_type=coltype) dm.c = 1, 1, nan, 4 dm.d = [nan] * 4 assert dm.c.mean == 2 assert dm.c.median == 1 assert dm.c.std == np.std([1, 1, 4], ddof=1) assert dm.c.max == 4 assert dm.c.min == 1 assert dm.c.sum == 6 all_nan(dm.d.mean, nan) all_nan(dm.d.median, nan) all_nan(dm.d.std, nan) all_nan(dm.d.max, nan) all_nan(dm.d.min, nan) all_nan(dm.d.sum, nan)
def _test_copying(cls): dm = DataMatrix(length=5) dm.d = cls dm2 = dm[:] dm2.e = dm.d dm2.f = dm2.d ok_(dm2 is not dm) ok_(dm2.d is not dm.d) ok_(dm2.e is not dm.d) ok_(dm2.f is dm2.d) ok_(dm2.d._seq is not dm.d._seq) dm.c = dm.d ok_(dm.c is dm.d) ok_(dm.c._seq is dm.d._seq) dm.e = dm.d[:] ok_(dm.e is not dm.d) ok_(dm.e._seq is not dm.d._seq) check_integrity(dm) check_integrity(dm2)
def test_shuffle_horiz(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = '-', '-' while True: dm2 = ops.shuffle_horiz(dm) try: check_row(dm2[0], [0, '-', 'a']) break except: pass while True: dm2 = ops.shuffle_horiz(dm.a, dm.b) try: check_row(dm2[0], [0, 'a', '-']) break except: pass for i in range(1000): dm2 = ops.shuffle_horiz(dm.a, dm.b) check_col(dm.c, ['-', '-'])
def test_shuffle_horiz(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = '-', '-' while True: dm2 = ops.shuffle_horiz(dm) try: check_row(dm2[0], [0, '-', 'a']) break except: pass while True: dm2 = ops.shuffle_horiz(dm.a, dm.b) try: check_row(dm2[0], [0, 'a', '-']) break except: pass for i in range(1000): dm2 = ops.shuffle_horiz(dm.a, dm.b) check_col(dm.c, ['-', '-']) ops.shuffle_horiz(dm.a)