def check_desc_stats(col_type, invalid, assert_invalid): dm = DataMatrix(length=4, default_col_type=col_type) # Even lengths dm.col = 1, 2, 3, 10 check_even(dm) if col_type is not IntColumn: dm.length = 5 dm.col = 1, 2, 3, 10, invalid check_even(dm) # Odd lengths (and even with one invalid) dm.length = 3 dm.col = 1, 2, 10 check_odd(dm) if col_type is not IntColumn: dm.length = 4 dm.col[3] = invalid check_odd(dm) # One lengths dm.length = 1 dm.col = 1 assert dm.col.mean == 1 assert dm.col.median == 1 assert_invalid(dm.col.std) assert dm.col.min == 1 assert dm.col.max == 1 assert dm.col.sum == 1 # Zero lengths dm.length = 0 assert_invalid(dm.col.mean) assert_invalid(dm.col.median) assert_invalid(dm.col.std) assert_invalid(dm.col.min) assert_invalid(dm.col.max) assert_invalid(dm.col.sum)
def check_desc_stats(col_type, invalid, assert_invalid): dm = DataMatrix(length=4, default_col_type=col_type) # Even lengths dm.col = 1, 2, 3, 10 check_even(dm) if col_type is not IntColumn: dm.length = 5 dm.col = 1, 2, 3, 10, invalid check_even(dm) # Odd lengths (and even with one invalid) dm.length = 3 dm.col = 1, 2, 10 check_odd(dm) if col_type is not IntColumn: dm.length = 4 dm.col[3] = invalid check_odd(dm) # One lengths dm.length = 1 dm.col = 1 eq_(dm.col.mean, 1) eq_(dm.col.median, 1) assert_invalid(dm.col.std) eq_(dm.col.min, 1) eq_(dm.col.max, 1) eq_(dm.col.sum, 1) # Zero lengths dm.length = 0 assert_invalid(dm.col.mean) assert_invalid(dm.col.median) assert_invalid(dm.col.std) assert_invalid(dm.col.min) assert_invalid(dm.col.max) assert_invalid(dm.col.sum)
def test_intcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = IntColumn dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) @raises(TypeError) def _(): dm.col[0] = "test" _() @raises(TypeError) def _(): dm.col[:] = "test" _() # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) # Check dtype ok_(dm.col._seq.dtype == np.int64) check_integrity(dm)
def _test_numericcolumn(cls): # Test init and change by single value dm = DataMatrix(length=2) dm.col = cls dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2 check_col(dm.col, [2, 2]) # Test init and change by sequence dm = DataMatrix(length=2) dm.col = cls dm.col = 1, 2 check_col(dm.col, [1, 2]) dm.col = 3, 4 check_col(dm.col, [3, 4]) # Test setting by slice dm = DataMatrix(length=3) dm.col = cls dm.col = 1 dm.col[1:] = 2 check_col(dm.col, [1, 2, 2]) dm.col[:-1] = 4, 3 check_col(dm.col, [4, 3, 2]) # Test shortening and lengthening dm = DataMatrix(length=4) dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) dm.col[dm.col == 2] = 0, 0 check_col(dm.col, [1, 0, 1, 0]) check_integrity(dm) # Check if numericcolumns return right type dm = DataMatrix(length=5) dm.col = cls dm.col = 1, 2, 3, 4, 5 # int -> float val = dm.col[2] ok_(isinstance(val, (int, float))) eq_(val, 3) # (int, int) -> FloatColumn val = dm.col[1, 3] ok_(isinstance(val, cls)) check_col(val, [2, 4]) # slice -> FloatColumn val = dm.col[1:-1] ok_(isinstance(val, cls)) check_col(val, [2, 3, 4]) # Check array setting and getting if cls != MixedColumn: a = dm.col.array ok_(isinstance(a, np.ndarray)) eq_(a.shape, (5, )) ok_(all(a == [1, 2, 3, 4, 5]))
def test_seriescolumn(): dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) # Set all rows to a single value dm.col = 1 check_series(dm.col, [[1, 1, 1], [1, 1, 1]]) # Set rows to different single values dm.col = 2, 3 check_series(dm.col, [[2, 2, 2], [3, 3, 3]]) # Set one row to a single value dm.col[0] = 4 check_series(dm.col, [[4, 4, 4], [3, 3, 3]]) # Set one row to different single values dm.col[1] = 5, 6, 7 check_series(dm.col, [[4, 4, 4], [5, 6, 7]]) # Set all rows to different single values dm.col.setallrows([8, 9, 10]) check_series(dm.col, [[8, 9, 10], [8, 9, 10]]) # Set the first value in all rows dm.col[:, 0] = 1 check_series(dm.col, [[1, 9, 10], [1, 9, 10]]) # Set all values in the first row dm.col[0, :] = 2 check_series(dm.col, [[2, 2, 2], [1, 9, 10]]) # Set all values dm.col[:, :] = 3 check_series(dm.col, [[3, 3, 3], [3, 3, 3]]) # Test shortening and lengthening dm.length = 0 check_series(dm.col, []) dm.length = 3 dm.col = 1, 2, 3 dm.col.depth = 1 check_series(dm.col, [[1], [2], [3]]) dm.col.depth = 3 check_series(dm.col, [[1, 0, 0], [2, 0, 0], [3, 0, 0]]) check_integrity(dm)
def test_z(): dm = DataMatrix(length=5) dm.a = range(-2, 3) dm.z = ops.z(dm.a) for test, ref in zip(dm.z, [-1.26, -0.63, 0, .63, 1.26]): assert (math.isclose(test, ref, abs_tol=.01)) # Add a non-numeric value, which should be ignored and its z value should # be NAN. dm.length = 6 dm.z = ops.z(dm.a) assert (dm.z[5] != dm.z[5]) for test, ref in zip(dm.z[:-1], [-1.26, -0.63, 0, .63, 1.26]): assert (math.isclose(test, ref, abs_tol=.01)) # If there is no variability, the z-scores should be NAN dm.a = 2 dm.z = ops.z(dm.a) assert (all(ref != ref for ref in dm.z))
def test_mixedcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) dm.col[:] = "test" check_col(dm.col, ["test", "test"]) # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) check_integrity(dm)
def parse_jatos_results(jatos_path): if hasattr(json.decoder, 'JSONDecodeError'): jsonerror = json.decoder.JSONDecodeError else: jsonerror = ValueError dm = DataMatrix(length=STEPS) invalid_lines = 0 total_lines = 0 row = 0 with open(jatos_path) as fd: for line in fd: line = line.strip() if line.startswith('[{'): line = line[1:] if line.endswith('},') or line.endswith('}]'): line = line[:-1] total_lines += 1 try: d = json.loads(line) except jsonerror: invalid_lines += 1 continue else: if not d: continue if row >= len(dm): dm.length += STEPS for key, val in d.items(): if key not in dm: dm[key] = u'' dm[key][row] = safe_decode(val) row += 1 dm.length = row if invalid_lines: warn('Failed to parse {} of {} lines'.format(invalid_lines, total_lines)) return dm
def test_floatcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = FloatColumn dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) dm.col[:] = "test" for value in dm.col: ok_(np.isnan(value)) # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) # Check dtype ok_(dm.col._seq.dtype == np.float64) check_integrity(dm)
def parse_jatos_results(jatos_path, include_context=False): """Converts a results file, as exported by JATOS, and returns it as a DataMatrix. If the context is included, columns are created for each of the context variables. """ if hasattr(json.decoder, 'JSONDecodeError'): jsonerror = json.decoder.JSONDecodeError else: jsonerror = ValueError dm = DataMatrix(length=STEPS) invalid_lines = 0 incomplete_lines = 0 total_lines = 0 row = 0 with safe_open(jatos_path) as fd: for line in fd: # Strip the lines so that they are valid json line = line.strip() if line.startswith('[{'): line = line[1:] if line.endswith('},') or line.endswith('}]'): line = line[:-1] total_lines += 1 # Ignore empty lines if not line: continue try: d = json.loads(line) except jsonerror: invalid_lines += 1 continue else: if not d: continue # Complete data is stored as a single-json object with a data and # context field. We're only interested in the data field here, # which in turn is a list of dicts where each dict corresponds to # a logger call. This data is stored when an experiment is # successfully finished. if len(d) == 2 and 'data' in d and 'context' in d: trials = d['data'] context = d['context'] # Incomplete data is stored as a single json line corresponding to # a single logger call. This kind of data should only happen when # the experiment is not finished, so that data is only partially # logged, one trial at a time. else: trials = [d] context = {} incomplete_lines += 1 first_row = row for trial in trials: if row >= len(dm): dm.length += STEPS for key, val in trial.items(): if key not in dm: dm[key] = u'' dm[key][row] = safe_decode(val) row += 1 if include_context: for key, value in _flatten_dict(context).items(): if key not in dm: dm[key] = u'' dm[key][first_row:row] = safe_decode(value) dm.length = row if invalid_lines: warn('Failed to parse {} of {} lines'.format(invalid_lines, total_lines)) if incomplete_lines: warn('Incomplete (unfinished) data in {} of {} lines'.format( incomplete_lines, total_lines)) return dm
def test_seriescolumn(): _test_copying(SeriesColumn(depth=1)) dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) # Set all rows to a single value dm.col = 1 check_series(dm.col, [[1, 1, 1], [1, 1, 1]]) # Set rows to different single values dm.col = 2, 3 check_series(dm.col, [[2, 2, 2], [3, 3, 3]]) # Set one row to a single value dm.col[0] = 4 check_series(dm.col, [[4, 4, 4], [3, 3, 3]]) # Set one row to different single values dm.col[1] = 5, 6, 7 check_series(dm.col, [[4, 4, 4], [5, 6, 7]]) # Set all rows to different single values dm.col.setallrows([8, 9, 10]) check_series(dm.col, [[8, 9, 10], [8, 9, 10]]) # Set the first value in all rows dm.col[:, 0] = 1 check_series(dm.col, [[1, 9, 10], [1, 9, 10]]) # Set all values in the first row dm.col[0, :] = 2 check_series(dm.col, [[2, 2, 2], [1, 9, 10]]) # Set all values dm.col[:, :] = 3 check_series(dm.col, [[3, 3, 3], [3, 3, 3]]) # Test shortening and lengthening dm.length = 0 check_series(dm.col, []) dm.length = 3 dm.col = 1, 2, 3 dm.col.depth = 1 check_series(dm.col, [[1], [2], [3]]) dm.col.depth = 3 check_series(dm.col, [[1, NAN, NAN], [2, NAN, NAN], [3, NAN, NAN]]) check_integrity(dm) # Test dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) dm.col = 1, 2 check_series(dm.col, [[1, 1, 1], [2, 2, 2]]) dm.col = 3, 4, 5 check_series(dm.col, [[3, 4, 5]] * 2) dm.col.depth = 2 dm.col[:] = 1, 2 check_series(dm.col, [[1, 1], [2, 2]]) dm.col[:, :] = 3, 4 check_series(dm.col, [[3, 4], [3, 4]]) # Check if series return right type dm = DataMatrix(length=4) dm.col = SeriesColumn(depth=5) dm.col = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [16, 17, 18, 19, 20]] # (int, int) -> float val = dm.col[2, 2] eq_(val, 13) eq_(type(val), float) # (int) -> array val = dm.col[2] ok_(all(val == np.array([11, 12, 13, 14, 15]))) eq_(type(val), np.ndarray) # (int, slice) -> array val = dm.col[2, 1:-1] ok_(all(val == np.array([12, 13, 14]))) eq_(type(val), np.ndarray) # (int, (int, int)) -> array val = dm.col[2, (1, 3)] ok_(all(val == np.array([12, 14]))) eq_(type(val), np.ndarray) # (slice) -> SeriesColumn val = dm.col[1:-1] check_series(val, [ [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], ]) # (slice, int) -> FloatColumn val = dm.col[1:-1, 2] ok_(isinstance(val, FloatColumn)) check_col(val, [8, 13]) # ((int, int), int) -> FloatColumn val = dm.col[(1, 3), 2] ok_(isinstance(val, FloatColumn)) check_col(val, [8, 18]) # (slice, slice) -> SeriesColumn val = dm.col[1:-1, 1:-1] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 8, 9], [12, 13, 14], ]) # ((int, int), slice) -> SeriesColumn val = dm.col[(1, 3), 1:-1] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 8, 9], [17, 18, 19], ]) # ((int, int), (int int)) -> SeriesColumn val = dm.col[(1, 3), (1, 3)] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 9], [17, 19], ])
def _test_numericcolumn(cls): # Test init and change by single value dm = DataMatrix(length=2) dm.col = cls dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2 check_col(dm.col, [2, 2]) # Test init and change by sequence dm = DataMatrix(length=2) dm.col = cls dm.col = 1, 2 check_col(dm.col, [1, 2]) dm.col = 3, 4 check_col(dm.col, [3, 4]) # Test setting by slice dm = DataMatrix(length=3) dm.col = cls dm.col = 1 dm.col[1:] = 2 check_col(dm.col, [1, 2, 2]) dm.col[:-1] = 4, 3 check_col(dm.col, [4, 3, 2]) # Test setting by DataMatrix dm = DataMatrix(length=10) dm.x = range(10) dm.y = FloatColumn dm = dm.x != {3, 6} dm.y[dm.x > 3] = 10 dm.y[dm.x >= 8] = 11 check_col(dm.y, [np.nan] * 3 + [10] * 3 + [11] * 2) # Test shortening and lengthening dm = DataMatrix(length=4) dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 assert sorted(dm.col.unique) == [1,2] dm.col[dm.col == 2] = 0, 0 check_col(dm.col, [1, 0, 1, 0]) check_integrity(dm) # Check if numericcolumns return right type dm = DataMatrix(length=5) dm.col = cls dm.col = 1, 2, 3, 4, 5 # int -> float val = dm.col[2] assert isinstance(val, (int, float)) assert val == 3 # (int, int) -> FloatColumn val = dm.col[1, 3] assert isinstance(val, cls) check_col(val, [2, 4]) # slice -> FloatColumn val = dm.col[1:-1] assert isinstance(val, cls) check_col(val, [2, 3, 4]) # Check array setting and getting if cls != MixedColumn: a = dm.col.array assert isinstance(a, np.ndarray) assert a.shape == (5,) assert all(a == [1, 2, 3, 4, 5])