Example #1
0
def check_mixedcolumn_sorting():

    dm = DataMatrix(length=24)
    dm.c = [
        1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf', NAN,
        NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta', 'None', ''
    ]
    dm.c = ops.shuffle(dm.c)
    dm = ops.sort(dm, by=dm.c)
    check_col(dm.c, [
        -INF,
        -INF,
        1,
        1,
        1.1,
        1.1,
        2,
        2,
        2.1,
        2.1,
        INF,
        INF,
        '',
        'None',
        'alpha',
        'beta',
        None,
        None,
        None,
        None,
        NAN,
        NAN,
        NAN,
        NAN,
    ])
def test_group():

	dm = DataMatrix(length=4)
	dm.a = 'b', 'b', 'a', 'a'
	dm.b = 'x', 'x', 'x', 'y'
	dm.c = IntColumn
	dm.c = 0, 1, 2, 3
	dm = ops.group(dm, [dm.a, dm.b])
	check_series(dm.c, [[2, np.nan], [3, np.nan], [0, 1]])
Example #3
0
def test_group():

    dm = DataMatrix(length=4)
    dm.a = 'b', 'b', 'a', 'a'
    dm.b = 'x', 'x', 'x', 'y'
    dm.c = IntColumn
    dm.c = 0, 1, 2, 3
    dm = ops.group(dm, [dm.a, dm.b])
    check_series(dm.c, [[3, np.nan], [2, np.nan], [0, 1]])  # Order guaranteed?
Example #4
0
def check_intcolumn_sorting():

	dm = DataMatrix(length=8, default_col_type=IntColumn)
	dm.c = [
		1, '1', 2, '2',
		1.1, '1.1', 2.1, '2.8',
	]
	dm.c = ops.shuffle(dm.c)
	dm = ops.sort(dm, by=dm.c)
	check_col(dm.c, [
		1, 1, 1, 1, 2, 2, 2, 2
	])
def test_properties():

    dm = DataMatrix(length=0)
    dm.c = -1
    assert dm.empty
    dm = DataMatrix(length=1)
    assert dm.empty
    dm = DataMatrix(length=1)
    dm.c = -1
    assert not dm.empty
    dm = DataMatrix(length=3)
    dm.c = -1
    dm.d = -1
    assert len(dm) == 3
def generatedata(effectsize=EFFECTSIZE,
                 blinksinbaseline=BLINKSINBASELINE,
                 **kwargs):

    dm = DataMatrix(length=TRACES)
    dm.c = IntColumn
    dm.c[:TRACES // 2] = 1
    dm.c[TRACES // 2:] = 2
    dm.y = SeriesColumn(depth=TRACELEN)
    dm.y.setallrows(a)
    dm.y += np.random.randint(NOISERANGE[0], NOISERANGE[1], TRACES)
    dm.y[TRACES // 2:] += np.linspace(0, effectsize, TRACELEN)
    # Inroduce blinks
    for i, row in enumerate(dm):
        blinklen = np.random.randint(BLINKLEN[0], BLINKLEN[1], BLINKS)
        if i < blinksinbaseline:
            blinkstart = np.array([1])
        else:
            blinkstart = np.random.randint(BASELINE[1], TRACELEN, BLINKS)
        blinkend = blinkstart + blinklen
        for start, end in zip(blinkstart, blinkend):
            end = min(TRACELEN - 1, end)
            if end - start < 2 * BLINKMARGIN:
                continue
            row.y[start:start+BLINKMARGIN] = \
             np.linspace(row.y[start-1], 0, BLINKMARGIN)
            row.y[end-BLINKMARGIN:end] = \
             np.linspace(0, row.y[end], BLINKMARGIN)
            row.y[start:end] = np.random.randint(0, 100, end - start)
    return dm
Example #7
0
def _test_numeric_properties(coltype, nan):

    dm = DataMatrix(length=4, default_col_type=coltype)
    dm.c = 1, 1, nan, 4
    dm.d = [nan] * 4
    assert dm.c.mean == 2
    assert dm.c.median == 1
    assert dm.c.std == np.std([1, 1, 4], ddof=1)
    assert dm.c.max == 4
    assert dm.c.min == 1
    assert dm.c.sum == 6
    if coltype in (IntColumn, FloatColumn):
        with pytest.warns(RuntimeWarning):
            all_nan(dm.d.mean, nan)
            all_nan(dm.d.median, nan)
            all_nan(dm.d.std, nan)
            all_nan(dm.d.max, nan)
            all_nan(dm.d.min, nan)
            all_nan(dm.d.sum, nan)
    else:
        all_nan(dm.d.mean, nan)
        all_nan(dm.d.median, nan)
        all_nan(dm.d.std, nan)
        all_nan(dm.d.max, nan)
        all_nan(dm.d.min, nan)
        all_nan(dm.d.sum, nan)
def test_group():

    dm = DataMatrix(length=4)
    dm.a = 'b', 'b', 'a', 'a'
    dm.b = 'x', 'x', 'x', 'y'
    dm.c = IntColumn
    dm.c = 0, 1, 2, 3
    dm = ops.group(dm, [dm.a, dm.b])
    # Assert that at least one of the permutations passes
    for ref in itertools.permutations([[3, np.nan], [2, np.nan], [0, 1]]):
        try:
            check_series(dm.c, ref)
            break
        except AssertionError:
            pass
    else:
        assert (False)
def test_auto_type():

    dm = DataMatrix(length=2)
    dm.a = 'a', 1
    dm.b = 0.1, 1
    dm.c = 0, 1
    dm = ops.auto_type(dm)
    assert isinstance(dm.a, MixedColumn)
    assert isinstance(dm.b, FloatColumn)
    assert isinstance(dm.c, IntColumn)
def test_auto_type():

	dm = DataMatrix(length=2)
	dm.a = 'a', 1
	dm.b = 0.1, 1
	dm.c = 0, 1
	ops.auto_type(dm)
	ok_(isinstance(dm.a, MixedColumn))
	ok_(isinstance(dm.b, FloatColumn))
	ok_(isinstance(dm.c, IntColumn))
Example #11
0
def test_keep_only():

    dm = DataMatrix(length=2)
    dm.a = 'a', 'b'
    dm.b = 0, 1
    dm.c = 'y', 'z'
    for cols in (['b', 'c'], [dm.b, dm.c]):
        dm = ops.keep_only(dm, *cols)
        assert 'a' not in dm.column_names
        assert 'b' in dm.column_names
        assert 'c' in dm.column_names
def test_replace():

    dm = DataMatrix(length=3)
    dm.a = 0, 1, 2
    dm.c = FloatColumn
    dm.c = np.nan, 1, 2
    dm.s = SeriesColumn(depth=3)
    dm.s[0] = 0, 1, 2
    dm.s[1] = np.nan, 1, 2
    dm.s[2] = np.nan, 1, 2
    dm.a = ops.replace(dm.a, {0: 100, 2: 200})
    dm.c = ops.replace(dm.c, {np.nan: 100, 2: np.nan})
    dm.s = ops.replace(dm.s, {np.nan: 100, 2: np.nan})
    check_col(dm.a, [100, 1, 200])
    check_col(dm.c, [100, 1, np.nan])
    check_series(dm.s, [
        [0, 1, np.nan],
        [100, 1, np.nan],
        [100, 1, np.nan],
    ])
Example #13
0
def _test_basic_properties(coltype):

    dm = DataMatrix(length=4, default_col_type=coltype)
    dm.c = 3, 1, 2, 3
    dm.d = dm.c
    dm.e = 3, 1, 2, 3
    eq_(dm.c.name, ['c', 'd'])
    eq_(dm.d.name, ['c', 'd'])
    eq_(dm.e.name, 'e')
    eq_(list(dm.c.unique), [1, 2, 3])
    eq_(dm.c.count, 3)
Example #14
0
def _test_basic_properties(coltype):

    dm = DataMatrix(length=4, default_col_type=coltype)
    dm.c = 3, 1, 2, 3
    dm.d = dm.c
    dm.e = 3, 1, 2, 3
    assert dm.c.name == ['c', 'd']
    assert dm.d.name == ['c', 'd']
    assert dm.e.name == 'e'
    assert list(dm.c.unique) == [1, 2, 3]
    assert dm.c.count == 3
Example #15
0
def check_floatcolumn_sorting():

    dm = DataMatrix(length=24, default_col_type=FloatColumn)
    with pytest.warns(UserWarning):
        dm.c = [
            1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf',
            NAN, NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta',
            'None', ''
        ]
    dm.c = ops.shuffle(dm.c)
    dm = ops.sort(dm, by=dm.c)
    check_col(dm.c, [
        -INF,
        -INF,
        1,
        1,
        1.1,
        1.1,
        2,
        2,
        2.1,
        2.1,
        INF,
        INF,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
    ])
Example #16
0
def _test_numeric_properties(coltype, nan):

    dm = DataMatrix(length=4, default_col_type=coltype)
    dm.c = 1, 1, nan, 4
    dm.d = [nan] * 4
    eq_(dm.c.mean, 2)
    eq_(dm.c.median, 1)
    eq_(dm.c.std, np.std([1, 1, 4], ddof=1))
    eq_(dm.c.max, 4)
    eq_(dm.c.min, 1)
    eq_(dm.c.sum, 6)
    all_nan(dm.d.mean, nan)
    all_nan(dm.d.median, nan)
    all_nan(dm.d.std, nan)
    all_nan(dm.d.max, nan)
    all_nan(dm.d.min, nan)
    all_nan(dm.d.sum, nan)
Example #17
0
def _test_numeric_properties(coltype, nan):

    dm = DataMatrix(length=4, default_col_type=coltype)
    dm.c = 1, 1, nan, 4
    dm.d = [nan] * 4
    assert dm.c.mean == 2
    assert dm.c.median == 1
    assert dm.c.std == np.std([1, 1, 4], ddof=1)
    assert dm.c.max == 4
    assert dm.c.min == 1
    assert dm.c.sum == 6
    all_nan(dm.d.mean, nan)
    all_nan(dm.d.median, nan)
    all_nan(dm.d.std, nan)
    all_nan(dm.d.max, nan)
    all_nan(dm.d.min, nan)
    all_nan(dm.d.sum, nan)
Example #18
0
def _test_copying(cls):

    dm = DataMatrix(length=5)
    dm.d = cls
    dm2 = dm[:]
    dm2.e = dm.d
    dm2.f = dm2.d
    ok_(dm2 is not dm)
    ok_(dm2.d is not dm.d)
    ok_(dm2.e is not dm.d)
    ok_(dm2.f is dm2.d)
    ok_(dm2.d._seq is not dm.d._seq)
    dm.c = dm.d
    ok_(dm.c is dm.d)
    ok_(dm.c._seq is dm.d._seq)
    dm.e = dm.d[:]
    ok_(dm.e is not dm.d)
    ok_(dm.e._seq is not dm.d._seq)
    check_integrity(dm)
    check_integrity(dm2)
def test_shuffle_horiz():

	dm = DataMatrix(length=2)
	dm.a = 'a', 'b'
	dm.b = 0, 1
	dm.c = '-', '-'
	while True:
		dm2 = ops.shuffle_horiz(dm)
		try:
			check_row(dm2[0], [0, '-', 'a'])
			break
		except:
			pass
	while True:
		dm2 = ops.shuffle_horiz(dm.a, dm.b)
		try:
			check_row(dm2[0], [0, 'a', '-'])
			break
		except:
			pass
	for i in range(1000):
		dm2 = ops.shuffle_horiz(dm.a, dm.b)
		check_col(dm.c, ['-', '-'])
def test_shuffle_horiz():

    dm = DataMatrix(length=2)
    dm.a = 'a', 'b'
    dm.b = 0, 1
    dm.c = '-', '-'
    while True:
        dm2 = ops.shuffle_horiz(dm)
        try:
            check_row(dm2[0], [0, '-', 'a'])
            break
        except:
            pass
    while True:
        dm2 = ops.shuffle_horiz(dm.a, dm.b)
        try:
            check_row(dm2[0], [0, 'a', '-'])
            break
        except:
            pass
    for i in range(1000):
        dm2 = ops.shuffle_horiz(dm.a, dm.b)
        check_col(dm.c, ['-', '-'])
    ops.shuffle_horiz(dm.a)