예제 #1
0
def check_desc_stats(col_type, invalid, assert_invalid):

    dm = DataMatrix(length=4, default_col_type=col_type)
    # Even lengths
    dm.col = 1, 2, 3, 10
    check_even(dm)
    if col_type is not IntColumn:
        dm.length = 5
        dm.col = 1, 2, 3, 10, invalid
        check_even(dm)
    # Odd lengths (and even with one invalid)
    dm.length = 3
    dm.col = 1, 2, 10
    check_odd(dm)
    if col_type is not IntColumn:
        dm.length = 4
        dm.col[3] = invalid
        check_odd(dm)
    # One lengths
    dm.length = 1
    dm.col = 1
    assert dm.col.mean == 1
    assert dm.col.median == 1
    assert_invalid(dm.col.std)
    assert dm.col.min == 1
    assert dm.col.max == 1
    assert dm.col.sum == 1
    # Zero lengths
    dm.length = 0
    assert_invalid(dm.col.mean)
    assert_invalid(dm.col.median)
    assert_invalid(dm.col.std)
    assert_invalid(dm.col.min)
    assert_invalid(dm.col.max)
    assert_invalid(dm.col.sum)
예제 #2
0
def check_desc_stats(col_type, invalid, assert_invalid):

	dm = DataMatrix(length=4, default_col_type=col_type)
	# Even lengths
	dm.col = 1, 2, 3, 10
	check_even(dm)
	if col_type is not IntColumn:
		dm.length = 5
		dm.col = 1, 2, 3, 10, invalid
		check_even(dm)
	# Odd lengths (and even with one invalid)
	dm.length = 3
	dm.col = 1, 2, 10
	check_odd(dm)
	if col_type is not IntColumn:
		dm.length = 4
		dm.col[3] = invalid
		check_odd(dm)
	# One lengths
	dm.length = 1
	dm.col = 1
	eq_(dm.col.mean, 1)
	eq_(dm.col.median, 1)
	assert_invalid(dm.col.std)
	eq_(dm.col.min, 1)
	eq_(dm.col.max, 1)
	eq_(dm.col.sum, 1)
	# Zero lengths
	dm.length = 0
	assert_invalid(dm.col.mean)
	assert_invalid(dm.col.median)
	assert_invalid(dm.col.std)
	assert_invalid(dm.col.min)
	assert_invalid(dm.col.max)
	assert_invalid(dm.col.sum)
예제 #3
0
def test_intcolumn():

    dm = DataMatrix(length=2)
    # Test assignment
    dm.col = IntColumn
    dm.col = 1
    check_col(dm.col, [1, 1])
    dm.col = 2, 3
    check_col(dm.col, [2, 3])
    dm.col[:-1] = 4
    check_col(dm.col, [4, 3])

    @raises(TypeError)
    def _():
        dm.col[0] = "test"

    _()

    @raises(TypeError)
    def _():
        dm.col[:] = "test"

    _()
    # Test shortening and lengthening
    dm.length = 0
    dm.length = 4
    # Check uniqueness
    dm.col = 1, 2, 1, 2
    ok_(sorted(dm.col.unique) == [1, 2])
    # Check dtype
    ok_(dm.col._seq.dtype == np.int64)
    check_integrity(dm)
예제 #4
0
def _test_numericcolumn(cls):

    # Test init and change by single value
    dm = DataMatrix(length=2)
    dm.col = cls
    dm.col = 1
    check_col(dm.col, [1, 1])
    dm.col = 2
    check_col(dm.col, [2, 2])
    # Test init and change by sequence
    dm = DataMatrix(length=2)
    dm.col = cls
    dm.col = 1, 2
    check_col(dm.col, [1, 2])
    dm.col = 3, 4
    check_col(dm.col, [3, 4])
    # Test setting by slice
    dm = DataMatrix(length=3)
    dm.col = cls
    dm.col = 1
    dm.col[1:] = 2
    check_col(dm.col, [1, 2, 2])
    dm.col[:-1] = 4, 3
    check_col(dm.col, [4, 3, 2])
    # Test shortening and lengthening
    dm = DataMatrix(length=4)
    dm.length = 0
    dm.length = 4
    # Check uniqueness
    dm.col = 1, 2, 1, 2
    ok_(sorted(dm.col.unique) == [1, 2])
    dm.col[dm.col == 2] = 0, 0
    check_col(dm.col, [1, 0, 1, 0])
    check_integrity(dm)
    # Check if numericcolumns return right type
    dm = DataMatrix(length=5)
    dm.col = cls
    dm.col = 1, 2, 3, 4, 5
    # int -> float
    val = dm.col[2]
    ok_(isinstance(val, (int, float)))
    eq_(val, 3)
    # (int, int) -> FloatColumn
    val = dm.col[1, 3]
    ok_(isinstance(val, cls))
    check_col(val, [2, 4])
    # slice -> FloatColumn
    val = dm.col[1:-1]
    ok_(isinstance(val, cls))
    check_col(val, [2, 3, 4])
    # Check array setting and getting
    if cls != MixedColumn:
        a = dm.col.array
        ok_(isinstance(a, np.ndarray))
        eq_(a.shape, (5, ))
        ok_(all(a == [1, 2, 3, 4, 5]))
예제 #5
0
def test_seriescolumn():

    dm = DataMatrix(length=2)
    dm.col = SeriesColumn(depth=3)
    # Set all rows to a single value
    dm.col = 1
    check_series(dm.col, [[1, 1, 1], [1, 1, 1]])
    # Set rows to different single values
    dm.col = 2, 3
    check_series(dm.col, [[2, 2, 2], [3, 3, 3]])
    # Set one row to a single value
    dm.col[0] = 4
    check_series(dm.col, [[4, 4, 4], [3, 3, 3]])
    # Set one row to different single values
    dm.col[1] = 5, 6, 7
    check_series(dm.col, [[4, 4, 4], [5, 6, 7]])
    # Set all rows to different single values
    dm.col.setallrows([8, 9, 10])
    check_series(dm.col, [[8, 9, 10], [8, 9, 10]])
    # Set the first value in all rows
    dm.col[:, 0] = 1
    check_series(dm.col, [[1, 9, 10], [1, 9, 10]])
    # Set all values in the first row
    dm.col[0, :] = 2
    check_series(dm.col, [[2, 2, 2], [1, 9, 10]])
    # Set all values
    dm.col[:, :] = 3
    check_series(dm.col, [[3, 3, 3], [3, 3, 3]])
    # Test shortening and lengthening
    dm.length = 0
    check_series(dm.col, [])
    dm.length = 3
    dm.col = 1, 2, 3
    dm.col.depth = 1
    check_series(dm.col, [[1], [2], [3]])
    dm.col.depth = 3
    check_series(dm.col, [[1, 0, 0], [2, 0, 0], [3, 0, 0]])
    check_integrity(dm)
def test_z():

    dm = DataMatrix(length=5)
    dm.a = range(-2, 3)
    dm.z = ops.z(dm.a)
    for test, ref in zip(dm.z, [-1.26, -0.63, 0, .63, 1.26]):
        assert (math.isclose(test, ref, abs_tol=.01))
    # Add a non-numeric value, which should be ignored and its z value should
    # be NAN.
    dm.length = 6
    dm.z = ops.z(dm.a)
    assert (dm.z[5] != dm.z[5])
    for test, ref in zip(dm.z[:-1], [-1.26, -0.63, 0, .63, 1.26]):
        assert (math.isclose(test, ref, abs_tol=.01))
    # If there is no variability, the z-scores should be NAN
    dm.a = 2
    dm.z = ops.z(dm.a)
    assert (all(ref != ref for ref in dm.z))
예제 #7
0
def test_mixedcolumn():

    dm = DataMatrix(length=2)
    # Test assignment
    dm.col = 1
    check_col(dm.col, [1, 1])
    dm.col = 2, 3
    check_col(dm.col, [2, 3])
    dm.col[:-1] = 4
    check_col(dm.col, [4, 3])
    dm.col[:] = "test"
    check_col(dm.col, ["test", "test"])
    # Test shortening and lengthening
    dm.length = 0
    dm.length = 4
    # Check uniqueness
    dm.col = 1, 2, 1, 2
    ok_(sorted(dm.col.unique) == [1, 2])
    check_integrity(dm)
def parse_jatos_results(jatos_path):

    if hasattr(json.decoder, 'JSONDecodeError'):
        jsonerror = json.decoder.JSONDecodeError
    else:
        jsonerror = ValueError
    dm = DataMatrix(length=STEPS)
    invalid_lines = 0
    total_lines = 0
    row = 0
    with open(jatos_path) as fd:
        for line in fd:
            line = line.strip()
            if line.startswith('[{'):
                line = line[1:]
            if line.endswith('},') or line.endswith('}]'):
                line = line[:-1]
            total_lines += 1
            try:
                d = json.loads(line)
            except jsonerror:
                invalid_lines += 1
                continue
            else:
                if not d:
                    continue
            if row >= len(dm):
                dm.length += STEPS
            for key, val in d.items():
                if key not in dm:
                    dm[key] = u''
                dm[key][row] = safe_decode(val)
            row += 1
    dm.length = row
    if invalid_lines:
        warn('Failed to parse {} of {} lines'.format(invalid_lines,
                                                     total_lines))
    return dm
예제 #9
0
def test_floatcolumn():

    dm = DataMatrix(length=2)
    # Test assignment
    dm.col = FloatColumn
    dm.col = 1
    check_col(dm.col, [1, 1])
    dm.col = 2, 3
    check_col(dm.col, [2, 3])
    dm.col[:-1] = 4
    check_col(dm.col, [4, 3])
    dm.col[:] = "test"
    for value in dm.col:
        ok_(np.isnan(value))
        # Test shortening and lengthening
    dm.length = 0
    dm.length = 4
    # Check uniqueness
    dm.col = 1, 2, 1, 2
    ok_(sorted(dm.col.unique) == [1, 2])
    # Check dtype
    ok_(dm.col._seq.dtype == np.float64)
    check_integrity(dm)
예제 #10
0
def parse_jatos_results(jatos_path, include_context=False):
    """Converts a results file, as exported by JATOS, and returns it as a
    DataMatrix. If the context is included, columns are created for each of the
    context variables.
    """

    if hasattr(json.decoder, 'JSONDecodeError'):
        jsonerror = json.decoder.JSONDecodeError
    else:
        jsonerror = ValueError
    dm = DataMatrix(length=STEPS)
    invalid_lines = 0
    incomplete_lines = 0
    total_lines = 0
    row = 0
    with safe_open(jatos_path) as fd:
        for line in fd:
            # Strip the lines so that they are valid json
            line = line.strip()
            if line.startswith('[{'):
                line = line[1:]
            if line.endswith('},') or line.endswith('}]'):
                line = line[:-1]
            total_lines += 1
            # Ignore empty lines
            if not line:
                continue
            try:
                d = json.loads(line)
            except jsonerror:
                invalid_lines += 1
                continue
            else:
                if not d:
                    continue
            # Complete data is stored as a single-json object with a data and
            # context field. We're only interested in the data field here,
            # which in turn is a list of dicts where each dict corresponds to
            # a logger call. This data is stored when an experiment is
            # successfully finished.
            if len(d) == 2 and 'data' in d and 'context' in d:
                trials = d['data']
                context = d['context']
            # Incomplete data is stored as a single json line corresponding to
            # a single logger call. This kind of data should only happen when
            # the experiment is not finished, so that data is only partially
            # logged, one trial at a time.
            else:
                trials = [d]
                context = {}
                incomplete_lines += 1
            first_row = row
            for trial in trials:
                if row >= len(dm):
                    dm.length += STEPS
                for key, val in trial.items():
                    if key not in dm:
                        dm[key] = u''
                    dm[key][row] = safe_decode(val)
                row += 1
            if include_context:
                for key, value in _flatten_dict(context).items():
                    if key not in dm:
                        dm[key] = u''
                    dm[key][first_row:row] = safe_decode(value)
    dm.length = row
    if invalid_lines:
        warn('Failed to parse {} of {} lines'.format(invalid_lines,
                                                     total_lines))
    if incomplete_lines:
        warn('Incomplete (unfinished) data in {} of {} lines'.format(
            incomplete_lines, total_lines))
    return dm
예제 #11
0
def test_seriescolumn():

    _test_copying(SeriesColumn(depth=1))
    dm = DataMatrix(length=2)
    dm.col = SeriesColumn(depth=3)
    # Set all rows to a single value
    dm.col = 1
    check_series(dm.col, [[1, 1, 1], [1, 1, 1]])
    # Set rows to different single values
    dm.col = 2, 3
    check_series(dm.col, [[2, 2, 2], [3, 3, 3]])
    # Set one row to a single value
    dm.col[0] = 4
    check_series(dm.col, [[4, 4, 4], [3, 3, 3]])
    # Set one row to different single values
    dm.col[1] = 5, 6, 7
    check_series(dm.col, [[4, 4, 4], [5, 6, 7]])
    # Set all rows to different single values
    dm.col.setallrows([8, 9, 10])
    check_series(dm.col, [[8, 9, 10], [8, 9, 10]])
    # Set the first value in all rows
    dm.col[:, 0] = 1
    check_series(dm.col, [[1, 9, 10], [1, 9, 10]])
    # Set all values in the first row
    dm.col[0, :] = 2
    check_series(dm.col, [[2, 2, 2], [1, 9, 10]])
    # Set all values
    dm.col[:, :] = 3
    check_series(dm.col, [[3, 3, 3], [3, 3, 3]])
    # Test shortening and lengthening
    dm.length = 0
    check_series(dm.col, [])
    dm.length = 3
    dm.col = 1, 2, 3
    dm.col.depth = 1
    check_series(dm.col, [[1], [2], [3]])
    dm.col.depth = 3
    check_series(dm.col, [[1, NAN, NAN], [2, NAN, NAN], [3, NAN, NAN]])
    check_integrity(dm)
    # Test
    dm = DataMatrix(length=2)
    dm.col = SeriesColumn(depth=3)
    dm.col = 1, 2
    check_series(dm.col, [[1, 1, 1], [2, 2, 2]])
    dm.col = 3, 4, 5
    check_series(dm.col, [[3, 4, 5]] * 2)
    dm.col.depth = 2
    dm.col[:] = 1, 2
    check_series(dm.col, [[1, 1], [2, 2]])
    dm.col[:, :] = 3, 4
    check_series(dm.col, [[3, 4], [3, 4]])
    # Check if series return right type
    dm = DataMatrix(length=4)
    dm.col = SeriesColumn(depth=5)
    dm.col = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20]]
    # (int, int) -> float
    val = dm.col[2, 2]
    eq_(val, 13)
    eq_(type(val), float)
    # (int) -> array
    val = dm.col[2]
    ok_(all(val == np.array([11, 12, 13, 14, 15])))
    eq_(type(val), np.ndarray)
    # (int, slice) -> array
    val = dm.col[2, 1:-1]
    ok_(all(val == np.array([12, 13, 14])))
    eq_(type(val), np.ndarray)
    # (int, (int, int)) -> array
    val = dm.col[2, (1, 3)]
    ok_(all(val == np.array([12, 14])))
    eq_(type(val), np.ndarray)
    # (slice) -> SeriesColumn
    val = dm.col[1:-1]
    check_series(val, [
        [6, 7, 8, 9, 10],
        [11, 12, 13, 14, 15],
    ])
    # (slice, int) -> FloatColumn
    val = dm.col[1:-1, 2]
    ok_(isinstance(val, FloatColumn))
    check_col(val, [8, 13])
    # ((int, int), int) -> FloatColumn
    val = dm.col[(1, 3), 2]
    ok_(isinstance(val, FloatColumn))
    check_col(val, [8, 18])
    # (slice, slice) -> SeriesColumn
    val = dm.col[1:-1, 1:-1]
    ok_(isinstance(val, _SeriesColumn))
    check_series(val, [
        [7, 8, 9],
        [12, 13, 14],
    ])
    # ((int, int), slice) -> SeriesColumn
    val = dm.col[(1, 3), 1:-1]
    ok_(isinstance(val, _SeriesColumn))
    check_series(val, [
        [7, 8, 9],
        [17, 18, 19],
    ])
    # ((int, int), (int int)) -> SeriesColumn
    val = dm.col[(1, 3), (1, 3)]
    ok_(isinstance(val, _SeriesColumn))
    check_series(val, [
        [7, 9],
        [17, 19],
    ])
예제 #12
0
def _test_numericcolumn(cls):

	# Test init and change by single value
	dm = DataMatrix(length=2)
	dm.col = cls
	dm.col = 1
	check_col(dm.col, [1, 1])
	dm.col = 2
	check_col(dm.col, [2, 2])
	# Test init and change by sequence
	dm = DataMatrix(length=2)
	dm.col = cls
	dm.col = 1, 2
	check_col(dm.col, [1, 2])
	dm.col = 3, 4
	check_col(dm.col, [3, 4])
	# Test setting by slice
	dm = DataMatrix(length=3)
	dm.col = cls
	dm.col = 1
	dm.col[1:] = 2
	check_col(dm.col, [1, 2, 2])
	dm.col[:-1] = 4, 3
	check_col(dm.col, [4, 3, 2])
	# Test setting by DataMatrix
	dm = DataMatrix(length=10)
	dm.x = range(10)
	dm.y = FloatColumn
	dm = dm.x != {3, 6}
	dm.y[dm.x > 3] = 10
	dm.y[dm.x >= 8] = 11
	check_col(dm.y, [np.nan] * 3 + [10] * 3 + [11] * 2)
	# Test shortening and lengthening
	dm = DataMatrix(length=4)
	dm.length = 0
	dm.length = 4
	# Check uniqueness
	dm.col = 1, 2, 1, 2
	assert sorted(dm.col.unique) == [1,2]
	dm.col[dm.col == 2] = 0, 0
	check_col(dm.col, [1, 0, 1, 0])
	check_integrity(dm)
	# Check if numericcolumns return right type
	dm = DataMatrix(length=5)
	dm.col = cls
	dm.col = 1, 2, 3, 4, 5
	# int -> float
	val = dm.col[2]
	assert isinstance(val, (int, float))
	assert val == 3
	# (int, int) -> FloatColumn
	val = dm.col[1, 3]
	assert isinstance(val, cls)
	check_col(val, [2, 4])
	# slice -> FloatColumn
	val = dm.col[1:-1]
	assert isinstance(val, cls)
	check_col(val, [2, 3, 4])
	# Check array setting and getting
	if cls != MixedColumn:
		a = dm.col.array
		assert isinstance(a, np.ndarray)
		assert a.shape == (5,)
		assert all(a == [1, 2, 3, 4, 5])