def glmer_series(dm, formula, family, winlen=1): col = formula.split()[0] depth = dm[col].depth rm = None for i in range(0, depth, winlen): wm = dm[:] wm[col] = series.reduce_( series.window(wm[col], start=i, end=i + winlen)) lm = glmer(wm, formula, family=family) print('Sample %d' % i) print(lm) if rm is None: rm = DataMatrix(length=len(lm)) rm.effect = list(lm.effect) rm.p = SeriesColumn(depth=depth) rm.z = SeriesColumn(depth=depth) rm.est = SeriesColumn(depth=depth) rm.se = SeriesColumn(depth=depth) for lmrow, rmrow in zip(lm, rm): rmrow.p[i:i + winlen] = lmrow.p rmrow.z[i:i + winlen] = lmrow.z rmrow.est[i:i + winlen] = lmrow.est rmrow.se[i:i + winlen] = lmrow.se return rm
def from_json(s): """ desc: | *Requires json_tricks* Creates a DataMatrix from a `json` string. arguments: s: desc: A json string. type: str returns: desc: A DataMatrix. type: DataMatrix. """ import json_tricks d = json_tricks.loads(s) dm = DataMatrix(length=len(d['rowid'])) for name, (coltype, seq) in d['columns'].items(): if coltype == '_SeriesColumn': dm[name] = SeriesColumn(depth=seq.shape[1]) dm[name]._seq = seq else: dm[name] = globals()[coltype] dm[name]._seq = seq return dm
def test_seriescolumn(): dm1 = DataMatrix(length=2) dm1.col1 = SeriesColumn(2) dm1.col1 = 1, 2 dm1.col_shared = SeriesColumn(2) dm1.col_shared = 3, 4 dm2 = DataMatrix(length=2) dm2.col2 = SeriesColumn(2) dm2.col2 = 5, 6 dm2.col_shared = SeriesColumn(2) dm2.col_shared = 7, 8 dm3 = dm1 << dm2 check_series(dm3.col1, [[1, 1], [2, 2], [np.nan, np.nan], [np.nan, np.nan]]) check_series(dm3.col_shared, [[3, 3], [4, 4], [7, 7], [8, 8]]) check_series(dm3.col2, [[np.nan, np.nan], [np.nan, np.nan], [5, 5], [6, 6]]) dm3.i = [4, 0, 2, 1] dm4 = dm3.i <= 2 dm5 = (dm3.i <= 2) | (dm3.i >= 3) check_integrity(dm1) check_integrity(dm2) check_integrity(dm3) check_integrity(dm4) check_integrity(dm5)
def test_concatenate(): dm = DataMatrix(length=1) dm.s1 = SeriesColumn(depth=3) dm.s1[:] = 1, 2, 3 dm.s2 = SeriesColumn(depth=3) dm.s2[:] = 3, 2, 1 dm.s = series.concatenate(dm.s1, dm.s2) check_series(dm.s, [[1, 2, 3, 3, 2, 1]])
def test_normalize_time(): dm = DataMatrix(length=2) dm.s = SeriesColumn(depth=2) dm.s[0] = 1, 2 dm.s[1] = np.nan, 3 dm.t = SeriesColumn(depth=2) dm.t[0] = 0, 3 dm.t[1] = 1, 2 dm.n = series.normalize_time(dm.s, dm.t) check_series(dm.n, [[1, np.nan, np.nan, 2], [np.nan, np.nan, 3, np.nan]])
def test_baseline(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=3) dm.series[0] = range(3) dm.series[1] = range(1, 4) dm.baseline = SeriesColumn(depth=3) dm.baseline[0] = range(1, 4) dm.baseline[1] = range(3) dm.norm = series.baseline(dm.series, dm.baseline) check_series(dm.norm, [[-2, -1, 0], [0, 1, 2]]) check_integrity(dm)
def generatedata(effectsize=EFFECTSIZE, blinksinbaseline=BLINKSINBASELINE, **kwargs): dm = DataMatrix(length=TRACES) dm.c = IntColumn dm.c[:TRACES // 2] = 1 dm.c[TRACES // 2:] = 2 dm.y = SeriesColumn(depth=TRACELEN) dm.y.setallrows(a) dm.y += np.random.randint(NOISERANGE[0], NOISERANGE[1], TRACES) dm.y[TRACES // 2:] += np.linspace(0, effectsize, TRACELEN) # Inroduce blinks for i, row in enumerate(dm): blinklen = np.random.randint(BLINKLEN[0], BLINKLEN[1], BLINKS) if i < blinksinbaseline: blinkstart = np.array([1]) else: blinkstart = np.random.randint(BASELINE[1], TRACELEN, BLINKS) blinkend = blinkstart + blinklen for start, end in zip(blinkstart, blinkend): end = min(TRACELEN - 1, end) if end - start < 2 * BLINKMARGIN: continue row.y[start:start+BLINKMARGIN] = \ np.linspace(row.y[start-1], 0, BLINKMARGIN) row.y[end-BLINKMARGIN:end] = \ np.linspace(0, row.y[end], BLINKMARGIN) row.y[start:end] = np.random.randint(0, 100, end - start) return dm
def test_reduce_(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=3) dm.series[0] = 1, 2, 3 dm.series[1] = 2, 3, 4 dm.col = series.reduce_(dm.series) check_col(dm.col, [2, 3]) check_integrity(dm)
def test_window(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=4) dm.series[0] = 0, 1, 1, 0 dm.series[1] = 0, 2, 2, 0 dm.window = series.window(dm.series, 1, 3) check_series(dm.window, [[1, 1], [2, 2]]) check_integrity(dm)
def test_interpolate(): dm = DataMatrix(length=3) dm.s = SeriesColumn(depth=4) dm.s = 1, 2, 3, 4 dm.s[0] = np.nan dm.s[1, 0] = np.nan dm.s[1, 2] = np.nan dm.i = series.interpolate(dm.s) check_series(dm.i, [[np.nan] * 4, [2, 2, 3, 4], [1, 2, 3, 4]])
def test_lock(): dm = DataMatrix(length=2) dm.s = SeriesColumn(depth=3) dm.s[0] = 1, 2, 3 dm.s[1] = -1, -2, -3 dm.l, zero_point = series.lock(dm.s, [-1, 1]) assert zero_point == 1 check_series(dm.l, [[np.nan, np.nan, 1, 2, 3], [-1, -2, -3, np.nan, np.nan]])
def test_seriescolumn(): dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=2) dm.col[0] = 1, 2 dm.col[1] = 3, 4 dm.col += 1 check_series(dm.col, [[2,3], [4,5]]) dm.col += 1, 2 check_series(dm.col, [[3,4], [6,7]]) dm.col -= 1 check_series(dm.col, [[2,3], [5,6]]) dm.col -= 1, 2 check_series(dm.col, [[1,2], [3,4]]) dm.col *= 2 check_series(dm.col, [[2,4], [6,8]]) dm.col *= 1.5, 3 check_series(dm.col, [[3,6], [18,24]]) dm.col /= 3 check_series(dm.col, [[1,2], [6,8]]) dm.col /= 1, 2 check_series(dm.col, [[1,2], [3,4]]) dm.col //= 1.5, 2.5 check_series(dm.col, [[0,1], [1,1]]) dm.col += np.array([ [0,0], [10, 10] ]) check_series(dm.col, [[0,1], [11,11]]) # Right-side operations dm.col[0] = 1, 2 dm.col[1] = 3, 4 dm.col = 1 + dm.col check_series(dm.col, [[2,3], [4,5]]) dm.col = (1, 2) + dm.col check_series(dm.col, [[3,4], [6,7]]) dm.col = 1 - dm.col check_series(dm.col, [[-2,-3], [-5,-6]]) dm.col = (1, 2) - dm.col check_series(dm.col, [[3, 4], [7, 8]]) dm.col = 2 * dm.col check_series(dm.col, [[6, 8], [14, 16]]) dm.col = (1.5, 3) * dm.col check_series(dm.col, [[9, 12], [42, 48]]) dm.col = 3 / dm.col check_series(dm.col, [[1./3, 1./4], [3./42, 1./16]]) dm.col = (1, 2) / dm.col check_series(dm.col, [[3, 4], [28, 32]]) dm.col = (1.5, 2.5) // dm.col check_series(dm.col, [[0, 0], [0, 0]]) dm.col = np.array([ [0, 0], [10, 10] ]) + dm.col check_series(dm.col, [[0, 0], [10, 10]])
def test_downsample(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=10) dm.series[0] = range(10) dm.series[1] = [0, 1] * 5 dm.d3 = series.downsample(dm.series, 3) dm.d5 = series.downsample(dm.series, 5) check_series(dm.d3, [[1, 4, 7], [1. / 3, 2. / 3, 1. / 3]]) check_series(dm.d5, [[2, 7], [.4, .6]]) check_integrity(dm)
def test_smooth(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=6) dm.series[0] = range(6) dm.series[1] = [0, 1, 2] * 2 dm.s = series.smooth(dm.series, winlen=3, wintype='flat') check_series( dm.s, [[2. / 3, 1, 2, 3, 4, 4 + 1. / 3], [2. / 3, 1, 1, 1, 1, 1 + 1. / 3]]) check_integrity(dm)
def test_threshold(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=4) dm.series[0] = range(4) dm.series[1] = range(1, 5) dm.t1 = series.threshold(dm.series, lambda v: v > 1) dm.t2 = series.threshold(dm.series, lambda v: v > 1 and v < 3) dm.t3 = series.threshold(dm.series, lambda v: v < 3, min_length=3) check_series(dm.t1, [[0, 0, 1, 1], [0, 1, 1, 1]]) check_series(dm.t2, [[0, 0, 1, 0], [0, 1, 0, 0]]) check_series(dm.t3, [[1, 1, 1, 0], [0, 0, 0, 0]]) check_integrity(dm)
def test_endlock(): dm = DataMatrix(length=4) dm.series = SeriesColumn(depth=3) dm.series[0] = 1, 2, 3 dm.series[1] = 1, np.nan, 3 dm.series[2] = 1, 2, np.nan dm.series[3] = np.nan, 2, np.nan dm.series = series.endlock(dm.series) check_series(dm.series, [ [1,2,3], [1,np.nan,3], [np.nan,1,2], [np.nan,np.nan,2], ])
def test_seriescolumn(): dm = DataMatrix(length=3) dm.col = SeriesColumn(depth=3) dm.col[0] = [1, 2, 3] dm.col[1] = [3, 3, 3] dm.col[2] = [4, 4, 4] assert all(dm.col.mean == [8. / 3, 9. / 3, 10 / 3.]) assert all(dm.col.median == [3, 3, 3]) assert all(dm.col.max == [4, 4, 4]) assert all(dm.col.min == [1, 2, 3]) assert all(dm.col.std == [ np.std([4, 3, 1], ddof=1), np.std([4, 3, 2], ddof=1), np.std([4, 3, 3], ddof=1) ])
def _best_fitting_col_type(col): """ visible: False desc: Determines the best fitting type for a column. """ from fastnumbers import isreal, isintlike if isinstance(col, _SeriesColumn): return SeriesColumn(depth=col.depth) if isinstance(col, (FloatColumn, IntColumn)): return type(col) if not all(isreal(val, allow_inf=True, allow_nan=True) for val in col): return MixedColumn if not all(isintlike(val) for val in col): return FloatColumn return IntColumn
def test_replace(): dm = DataMatrix(length=3) dm.a = 0, 1, 2 dm.c = FloatColumn dm.c = np.nan, 1, 2 dm.s = SeriesColumn(depth=3) dm.s[0] = 0, 1, 2 dm.s[1] = np.nan, 1, 2 dm.s[2] = np.nan, 1, 2 dm.a = ops.replace(dm.a, {0: 100, 2: 200}) dm.c = ops.replace(dm.c, {np.nan: 100, 2: np.nan}) dm.s = ops.replace(dm.s, {np.nan: 100, 2: np.nan}) check_col(dm.a, [100, 1, 200]) check_col(dm.c, [100, 1, np.nan]) check_series(dm.s, [ [0, 1, np.nan], [100, 1, np.nan], [100, 1, np.nan], ])
def end_phase(self, l): self.trialdm['t_offset_%s' % self.current_phase] = l[1] for i, (tracelabel, prefix, trace) in enumerate([ (u'pupil', u'ptrace_', self.ptrace), (u'xcoor', u'xtrace_', self.xtrace), (u'ycoor', u'ytrace_', self.ytrace), (u'time', u'ttrace_', self.ttrace), (None, u'fixxlist_', self.fixxlist), (None, u'fixylist_', self.fixylist), (None, u'fixstlist_', self.fixstlist), (None, u'fixetlist_', self.fixetlist), ]): trace = np.array(trace) if tracelabel is not None and self._traceprocessor is not None: trace = self._traceprocessor(tracelabel, trace) if self._maxtracelen is not None \ and len(trace) > self._maxtracelen: warnings.warn(u'Trace %s is too long (%d samples)' \ % (self.current_phase, len(trace))) trace = trace[:self._maxtracelen] colname = prefix + self.current_phase self.trialdm[colname] = SeriesColumn( len(trace), defaultnan=True) self.trialdm[colname][0] = trace # Start the time trace at 0 if len(trace) and prefix in (u'ttrace_', u'fixstlist_', u'fixetlist_'): self.trialdm[colname][0] -= self._t_onset # DEBUG CODE # from matplotlib import pyplot as plt # plt.subplot(4,2,i+1) # plt.title(colname) # plt.plot(_trace, color='blue') # xdata = np.linspace(0, len(_trace)-1, len(trace)) # plt.plot(xdata, trace, color='red') # plt.show() self.current_phase = None
def _set_col(self, name, value): """ visible: False desc: Sets columns in various formats. Is used by __setitem__ and __setattr__. """ # Check if this is a valid column name if isinstance(name, bytes): name = safe_decode(name) if not isinstance(name, str): raise TypeError(u'Column names should be str, not %s' % type(name)) # Create a new column by column type: # dm[name] = IntColumn # dm[name] = float if isinstance(value, type): if value == int: from datamatrix import IntColumn value = IntColumn elif value == float: from datamatrix import FloatColumn value = FloatColumn if issubclass(value, BaseColumn): self._cols[name] = value(self) return # Create a new column by type, kwdict tuple if (isinstance(value, tuple) and len(value) == 2 and isinstance(value[0], type) and issubclass(value[0], BaseColumn)): cls, kwdict = value self._cols[name] = cls(self, **kwdict) return # Create new column by existing column if isinstance(value, BaseColumn): # If the column belongs to the same datamatrix we simply insert it # under a new name. if value._datamatrix is self: self._cols[name] = value return # If the column belongs to another datamatrix, we create a new # column of the same type if len(value) != len(self): raise ValueError( u'Column should have the same length as the DataMatrix') self._cols[name] = value._empty_col(datamatrix=self) if name not in self: # Create a new SeriesColumn by assigning a 2D ndarray, but only if the # column doesn't exist yet if np is not None and isinstance(value, np.ndarray) and \ len(value.shape) == 2: if value.shape[0] == len(self): depth = value.shape[1] elif value.shape[1] == len(self): depth = value.shape[0] value = np.swapaxes(value, 0, 1) else: raise ValueError( 'Invalid shape for SeriesColumn: {}'.format( value.shape)) from datamatrix import SeriesColumn self[name] = SeriesColumn(depth=depth) else: self._cols[name] = self._default_col_type(self) self._cols[name][:] = value self._mutate()
def group(dm, by): """ desc: | *Requires numpy* Groups the DataMatrix by unique values in a set of grouping columns. Grouped columns are stored as SeriesColumns. The columns that are grouped should contain numeric values. The order in which groups appear in the grouped DataMatrix is unpredictable. __Example:__ %-- python: | from datamatrix import DataMatrix, operations as ops dm = DataMatrix(length=4) dm.A = 'x', 'x', 'y', 'y' dm.B = 0, 1, 2, 3 print('Original:') print(dm) dm = ops.group(dm, by=dm.A) print('Grouped by A:') print(dm) --% arguments: dm: desc: The DataMatrix to group. type: DataMatrix by: desc: A column or list of columns to group by. type: [BaseColumn, list] returns: desc: A grouped DataMatrix. type: DataMatrix """ bycol = MixedColumn(datamatrix=dm) bynames = [] if by is not None: if isinstance(by, BaseColumn): bynames = [by.name] by = [by] for col in by: if col._datamatrix is not dm: raise ValueError(u'By-columns are from a different DataMatrix') bycol += col bynames += [col.name] bycol_hashed = IntColumn(datamatrix=dm) bycol_hashed[:] = [hash(key) for key in bycol] keys = bycol_hashed.unique groupcols = [ (name, col) for name, col in dm.columns if name not in bynames ] nogroupcols = [(name, col) for name, col in dm.columns if name in bynames] cm = DataMatrix(length=len(keys)) for name, col in groupcols: if isinstance(col, _SeriesColumn): warn(u'Failed to create series for SeriesColumn s%s' % name) continue cm[name] = SeriesColumn(depth=0) for name, col in nogroupcols: cm[name] = col.__class__ for i, key in enumerate(keys): dm_ = bycol_hashed == int(key) for name, col in groupcols: if isinstance(col, _SeriesColumn): continue if cm[name].depth < len(dm_[name]): cm[name].defaultnan = True cm[name].depth = len(dm_[name]) cm[name].defaultnan = False try: cm[name][i, :len(dm_[name])] = dm_[name] except ValueError: warn(u'Failed to create series for MixedColumn %s' % name) for name, col in nogroupcols: cm[name][i] = dm_[name][0] return cm
def test_seriescolumn(): _test_copying(SeriesColumn(depth=1)) dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) # Set all rows to a single value dm.col = 1 check_series(dm.col, [[1, 1, 1], [1, 1, 1]]) # Set rows to different single values dm.col = 2, 3 check_series(dm.col, [[2, 2, 2], [3, 3, 3]]) # Set one row to a single value dm.col[0] = 4 check_series(dm.col, [[4, 4, 4], [3, 3, 3]]) # Set one row to different single values dm.col[1] = 5, 6, 7 check_series(dm.col, [[4, 4, 4], [5, 6, 7]]) # Set all rows to different single values dm.col.setallrows([8, 9, 10]) check_series(dm.col, [[8, 9, 10], [8, 9, 10]]) # Set the first value in all rows dm.col[:, 0] = 1 check_series(dm.col, [[1, 9, 10], [1, 9, 10]]) # Set all values in the first row dm.col[0, :] = 2 check_series(dm.col, [[2, 2, 2], [1, 9, 10]]) # Set all values dm.col[:, :] = 3 check_series(dm.col, [[3, 3, 3], [3, 3, 3]]) # Test shortening and lengthening dm.length = 0 check_series(dm.col, []) dm.length = 3 dm.col = 1, 2, 3 dm.col.depth = 1 check_series(dm.col, [[1], [2], [3]]) dm.col.depth = 3 check_series(dm.col, [[1, NAN, NAN], [2, NAN, NAN], [3, NAN, NAN]]) check_integrity(dm) # Test dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) dm.col = 1, 2 check_series(dm.col, [[1, 1, 1], [2, 2, 2]]) dm.col = 3, 4, 5 check_series(dm.col, [[3, 4, 5]] * 2) dm.col.depth = 2 dm.col[:] = 1, 2 check_series(dm.col, [[1, 1], [2, 2]]) dm.col[:, :] = 3, 4 check_series(dm.col, [[3, 4], [3, 4]]) # Check if series return right type dm = DataMatrix(length=4) dm.col = SeriesColumn(depth=5) dm.col = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [16, 17, 18, 19, 20]] # (int, int) -> float val = dm.col[2, 2] eq_(val, 13) eq_(type(val), float) # (int) -> array val = dm.col[2] ok_(all(val == np.array([11, 12, 13, 14, 15]))) eq_(type(val), np.ndarray) # (int, slice) -> array val = dm.col[2, 1:-1] ok_(all(val == np.array([12, 13, 14]))) eq_(type(val), np.ndarray) # (int, (int, int)) -> array val = dm.col[2, (1, 3)] ok_(all(val == np.array([12, 14]))) eq_(type(val), np.ndarray) # (slice) -> SeriesColumn val = dm.col[1:-1] check_series(val, [ [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], ]) # (slice, int) -> FloatColumn val = dm.col[1:-1, 2] ok_(isinstance(val, FloatColumn)) check_col(val, [8, 13]) # ((int, int), int) -> FloatColumn val = dm.col[(1, 3), 2] ok_(isinstance(val, FloatColumn)) check_col(val, [8, 18]) # (slice, slice) -> SeriesColumn val = dm.col[1:-1, 1:-1] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 8, 9], [12, 13, 14], ]) # ((int, int), slice) -> SeriesColumn val = dm.col[(1, 3), 1:-1] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 8, 9], [17, 18, 19], ]) # ((int, int), (int int)) -> SeriesColumn val = dm.col[(1, 3), (1, 3)] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 9], [17, 19], ])
def group(dm, by=None): """ desc: | *Requires numpy* Groups the DataMatrix by unique values in a set of grouping columns. Grouped columns are stored as SeriesColumns. The columns that are grouped should contain numeric values. For example: A B --- x 0 x 1 y 2 y 3 >>> group(dm, by=[dm.a]) Gives: A B --- x [0, 1] y [2, 3] arguments: dm: desc: The DataMatrix to group. type: DataMatrix keywords: by: A list of columns to group by. type: [list, None] returns: desc: A grouped DataMatrix. type: DataMatrix """ import numpy as np bycol = MixedColumn(datamatrix=dm) if by is not None: for col in by: if col._datamatrix is not dm: raise ValueError(u'By-columns are from a different DataMatrix') bycol += col keys = bycol.unique groupcols = [(name, col) for name, col in dm.columns if col not in by] nogroupcols = [(name, col) for name, col in dm.columns if col in by] cm = DataMatrix(length=len(keys)) for name, col in groupcols: if isinstance(col, _SeriesColumn): warn(u'Failed to create series for SeriesColumn s%s' % name) continue cm[name] = SeriesColumn(depth=0) for name, col in nogroupcols: cm[name] = col.__class__ for i, key in enumerate(keys): dm_ = bycol == key for name, col in groupcols: if isinstance(col, _SeriesColumn): continue if cm[name].depth < len(dm_[name]): cm[name].defaultnan = True cm[name].depth = len(dm_[name]) cm[name].defaultnan = False try: cm[name][i,:len(dm_[name])] = dm_[name] except ValueError: warn(u'Failed to create series for MixedColumn %s' % name) for name, col in nogroupcols: cm[name][i] = dm_[name][0] return cm