def _update(self, name, workspace_func): if (not hasattr(self, '_dock_widget') or not self._dock_widget.isVisible()): return self._dock_widget.setWindowTitle(_(u'Workspace ({})').format(name)) workspace = workspace_func() # If the workspace didn't reply, we try again in a second if workspace is None or workspace.get(u'no reply', False) is None: QTimer.singleShot(1000, lambda: self._update(name, workspace_func)) return # If the current kernel doesn't expose its workspace, indicate this if workspace.get(u'not supported', False) is None: dm = DataMatrix(length=0) dm.kernel_not_supported = -1 # Create a DataMatrix that exposes the workspace else: dm = DataMatrix(length=len(workspace)) dm.sorted = False dm.name = '' dm.value = '' dm.shape = '' dm.type = '' for row, (var, data) in zip(dm, workspace.items()): if data is None: oslogger.warning(u'invalid workspace data: {}'.format(var)) continue value, type_, shape = data row.value = value row.name = var if shape is not None: row.shape = repr(shape) row.type = type_ self._qdm.dm = dm self._qdm.refresh()
def combine(dm, l_neg_dist1, l_neg_dist2, l_neu_dist1, l_neu_dist2): """ Make a potential combination of distractors Arguments: dm --- a DataMatrix instance l_neg_dist1 --- list of potential first negative distractors l_neg_dist2 l_neg_dist1 l_neu_dist2 Returns: new_dm_exp """ # Create an empty data file which will eventually contain all experimental trial info new_dm_exp = DataMatrix() # Walk through the rows of the experimental dm for row in dm: # Retrieve trial info: target_scene = row.Scene target_emotion = row.Emotion trial_id = row.Trial_ID target_object = row.Object print("valence = ", target_emotion) #sys.exit() # Create an empty mini dm: trial_dm = DataMatrix(1) # Randomly select two possible distractors (from the same category as the target scene) if target_emotion == "neg": dist1 = random.choice(l_neg_dist1) dist2 = random.choice(l_neg_dist2) l_neg_dist1.remove(dist1) l_neg_dist2.remove(dist2) elif target_emotion == "neu": dist1 = random.choice(l_neu_dist1) dist2 = random.choice(l_neu_dist2) l_neu_dist1.remove(dist1) l_neu_dist2.remove(dist2) else: raise Exception("Unknown valence category") # Add info to the trial_dm: trial_dm["distractor_scene_1"] = dist1 trial_dm["distractor_scene_2"] = dist2 trial_dm["Scene"] = target_scene trial_dm["Emotion"] = target_emotion trial_dm["Trial_ID"] = trial_id trial_dm["Object"] = target_object # Merge the current trial to the big dm: new_dm_exp = new_dm_exp << trial_dm # After having looped through all rows, return the full dm: return new_dm_exp
def test_intcolumn(): check_getrow(IntColumn) check_select(IntColumn) check_concat(IntColumn, invalid=0) # Check selections with non-int types dm = DataMatrix(length=2, default_col_type=IntColumn) dm.col = 1, 2 dm2 = dm.col == '1.1' # Floored to 1 check_col(dm2.col, [1]) dm2 = dm.col == '' check_col(dm2.col, []) dm2 = dm.col != '' check_col(dm2.col, [1, 2]) @raises(TypeError) def _(): dm.col > '' _() # Check type selectors dm = DataMatrix(length=2, default_col_type=IntColumn) dm.col = 1, 2 eq_(len(dm.col == int), 2) eq_(len(dm.col != int), 0) eq_(len(dm.col == float), 0) eq_(len(dm.col != float), 2) eq_(len(dm.col == str), 0) eq_(len(dm.col != str), 2)
def test_seriescolumn(): dm1 = DataMatrix(length=2) dm1.col1 = SeriesColumn(2) dm1.col1 = 1, 2 dm1.col_shared = SeriesColumn(2) dm1.col_shared = 3, 4 dm2 = DataMatrix(length=2) dm2.col2 = SeriesColumn(2) dm2.col2 = 5, 6 dm2.col_shared = SeriesColumn(2) dm2.col_shared = 7, 8 dm3 = dm1 << dm2 check_series(dm3.col1, [[1, 1], [2, 2], [np.nan, np.nan], [np.nan, np.nan]]) check_series(dm3.col_shared, [[3, 3], [4, 4], [7, 7], [8, 8]]) check_series(dm3.col2, [[np.nan, np.nan], [np.nan, np.nan], [5, 5], [6, 6]]) dm3.i = [4, 0, 2, 1] dm4 = dm3.i <= 2 dm5 = (dm3.i <= 2) | (dm3.i >= 3) check_integrity(dm1) check_integrity(dm2) check_integrity(dm3) check_integrity(dm4) check_integrity(dm5)
def fullfactorial(dm, ignore=u''): """ desc: | *Requires numpy* Creates a new DataMatrix that uses a specified DataMatrix as the base of a full-factorial design. That is, each value of every row is combined with each value from every other row. For example: __Example:__ %-- python: | from datamatrix import DataMatrix, operations as ops dm = DataMatrix(length=2) dm.A = 'x', 'y' dm.B = 3, 4 dm = ops.fullfactorial(dm) print(dm) --% arguments: dm: desc: The source DataMatrix. type: DataMatrix keywords: ignore: A value that should be ignored. return: type: DataMatrix """ if not dm.columns: return DataMatrix() if not all(isinstance(col, MixedColumn) for colname, col in dm.columns): raise TypeError(u'fullfactorial only works with MixedColumns') # Create a new DataMatrix that strips all empty cells, and packs them such # that empty cells are moved toward the end. dm = dm[:] for colname, col in dm.columns: col = (col != ignore)[colname] dm[colname][:len(col)] = col dm[colname][len(col):] = ignore # A list where each value is an int X that corresponds to a factor with X # levels. design = [len(c != ignore) for n, c in dm.columns] a = _fullfact(design) # Create an DataMatrix with empty columns fdm = DataMatrix(a.shape[0]) for name in dm.column_names: fdm[name] = u'' for i in range(a.shape[0]): row = a[i] for rownr, name in enumerate(dm.column_names): fdm[name][i] = dm[name][int(row[rownr])] return fdm
def _test_numericcolumn(cls): # Test init and change by single value dm = DataMatrix(length=2) dm.col = cls dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2 check_col(dm.col, [2, 2]) # Test init and change by sequence dm = DataMatrix(length=2) dm.col = cls dm.col = 1, 2 check_col(dm.col, [1, 2]) dm.col = 3, 4 check_col(dm.col, [3, 4]) # Test setting by slice dm = DataMatrix(length=3) dm.col = cls dm.col = 1 dm.col[1:] = 2 check_col(dm.col, [1, 2, 2]) dm.col[:-1] = 4, 3 check_col(dm.col, [4, 3, 2]) # Test shortening and lengthening dm = DataMatrix(length=4) dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) dm.col[dm.col == 2] = 0, 0 check_col(dm.col, [1, 0, 1, 0]) check_integrity(dm) # Check if numericcolumns return right type dm = DataMatrix(length=5) dm.col = cls dm.col = 1, 2, 3, 4, 5 # int -> float val = dm.col[2] ok_(isinstance(val, (int, float))) eq_(val, 3) # (int, int) -> FloatColumn val = dm.col[1, 3] ok_(isinstance(val, cls)) check_col(val, [2, 4]) # slice -> FloatColumn val = dm.col[1:-1] ok_(isinstance(val, cls)) check_col(val, [2, 3, 4]) # Check array setting and getting if cls != MixedColumn: a = dm.col.array ok_(isinstance(a, np.ndarray)) eq_(a.shape, (5, )) ok_(all(a == [1, 2, 3, 4, 5]))
def check_concat(col_type, invalid): dm1 = DataMatrix(length=2, default_col_type=col_type) dm1.col1 = 1, 2 dm1.col_shared = 3, 4 dm2 = DataMatrix(length=2, default_col_type=col_type) dm2.col2 = 5, 6 dm2.col_shared = 7, 8 dm3 = dm1 << dm2 check_col(dm3.col1, [1, 2, invalid, invalid]) check_col(dm3.col_shared, [3, 4, 7, 8]) check_col(dm3.col2, [invalid, invalid, 5, 6])
def _test_equals(cls): dm1 = DataMatrix(length=4) dm1.col = cls dm1.col = 1, 2, 3, NAN dm2 = DataMatrix(length=4) dm2.col = cls dm2.col = 1, 2, 3, NAN dm3 = DataMatrix(length=4) dm3.col = cls dm3.col = 1, 2, NAN, 3 assert dm1[2].equals(dm2[2]) assert not dm1[2].equals(dm3[2])
def test_properties(): dm = DataMatrix(length=0) dm.c = -1 assert dm.empty dm = DataMatrix(length=1) assert dm.empty dm = DataMatrix(length=1) dm.c = -1 assert not dm.empty dm = DataMatrix(length=3) dm.c = -1 dm.d = -1 assert len(dm) == 3
def check_select(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, 2 dm_ = dm.col < 2 check_col(dm_.col, [1]) dm_ = dm.col == 2 check_col(dm_.col, [2]) dm_ = (dm.col == 1) | (dm.col == 2) # or check_col(dm_.col, [1, 2]) dm_ = (dm.col == 1) & (dm.col == 2) # and check_col(dm_.col, []) dm_ = (dm.col == 1) ^ (dm.col == 2) # xor check_col(dm_.col, [1, 2]) # Pair-wise select by matching-length sequence dm_ = dm.col == (1, 3) check_col(dm_.col, [1]) # Check by set multimatching dm_ = dm.col == {2, 3, 4} check_col(dm_.col, [2]) dm_ = dm.col != {1, 3, 4} check_col(dm_.col, [2]) # Check by lambda comparison dm_ = dm.col == (lambda x: x == 2) check_col(dm_.col, [2]) dm_ = dm.col != (lambda x: x == 2) check_col(dm_.col, [1]) check_integrity(dm)
def auto_type(dm): """ desc: | *Requires fastnumbers* Converts all columns of type MixedColumn to IntColumn if all values are integer numbers, or FloatColumn if all values are non-integer numbes. %-- python: | from datamatrix import DataMatrix, operations dm = DataMatrix(length=5) dm.A = 'a' dm.B = 1 dm.C = 1.1 dm_new = operations.auto_type(dm) print('dm_new.A: %s' % type(dm_new.A)) print('dm_new.B: %s' % type(dm_new.B)) print('dm_new.C: %s' % type(dm_new.C)) --% arguments: dm: type: DataMatrix returns: type: DataMatrix """ new_dm = DataMatrix(length=len(dm)) for name, col in dm.columns: new_dm[name] = _best_fitting_col_type(col) new_dm[name][:] = col return new_dm
def parse_file(self, path): logging.info(u'parsing {}'.format(path)) path = self.edf2asc(path) self.filedm = DataMatrix() self.trialid = None self.path = path self.on_start_file() ntrial = 0 self._linestack = [] with open(path) as f: for line in self.stacked_file(f): # Only messages can be start-trial messages, so performance we # don't do anything with non-MSG lines. if not self.is_message(line): continue if self.is_start_trial(self.split(line)): ntrial += 1 self.print_(u'.') self.filedm <<= self.parse_trial(f) self.on_end_file() logging.info(u' ({} trials)\n'.format(ntrial)) # Force garbage collection. Without it, memory seems to fill # up more quickly than necessary. gc.collect() return self.filedm
def _test_numeric_properties(coltype, nan): dm = DataMatrix(length=4, default_col_type=coltype) dm.c = 1, 1, nan, 4 dm.d = [nan] * 4 assert dm.c.mean == 2 assert dm.c.median == 1 assert dm.c.std == np.std([1, 1, 4], ddof=1) assert dm.c.max == 4 assert dm.c.min == 1 assert dm.c.sum == 6 if coltype in (IntColumn, FloatColumn): with pytest.warns(RuntimeWarning): all_nan(dm.d.mean, nan) all_nan(dm.d.median, nan) all_nan(dm.d.std, nan) all_nan(dm.d.max, nan) all_nan(dm.d.min, nan) all_nan(dm.d.sum, nan) else: all_nan(dm.d.mean, nan) all_nan(dm.d.median, nan) all_nan(dm.d.std, nan) all_nan(dm.d.max, nan) all_nan(dm.d.min, nan) all_nan(dm.d.sum, nan)
def parse_trial(self, f): self.trialdm = DataMatrix(length=1) self.trialdm.path = self.path self.trialdm.trialid = self.trialid self.on_start_trial() for line in self.stacked_file(f): l = self.split(line) if not l: warnings.warn(u'Empty line') continue # Only messages can be variables or end-trial messages, so to # improve performance don't even check. if self.is_message(line): if self.is_end_trial(l): break self.parse_variable(l) self.parse_phase(l) self.parse_line(l) if self.current_phase is not None: warnings.warn( u'Trial ended while phase "%s" was still ongoing' \ % self.current_phase) self.end_phase(l) self.on_end_trial() return self.trialdm
def test_z(): dm = DataMatrix(length=5) dm.a = range(-2, 3) dm.z = ops.z(dm.a) for x, y in zip(dm.z, [-1.26, -0.63, 0, .63, 1.26]): assert (abs(x - y) < .1)
def test_io(): refdm = DataMatrix(length=3) refdm[u'tést'] = 1, 2, u'' refdm.B = u'mathôt', u'b', u'x' refdm.C = u'a,\\b"\'c', 8, u'' testdm = io.readtxt('testcases/data/data.csv') check_dm(refdm, testdm) io.writetxt(testdm, 'tmp.csv') testdm = io.readtxt('tmp.csv') check_dm(refdm, testdm) refdm = io.readtxt('testcases/data/line-ending-cr.csv') check_dm(refdm, testdm) refdm = io.readtxt('testcases/data/line-ending-crlf.csv') check_dm(refdm, testdm) refdm = io.readtxt('testcases/data/data-with-bom.csv') check_dm(refdm, testdm) io.writepickle(testdm, 'tmp.pickle') testdm = io.readpickle('tmp.pickle') check_dm(refdm, testdm) io.writexlsx(testdm, 'tmp.xlsx') with pytest.warns(UserWarning): # Not all rows have column C testdm = io.readxlsx('tmp.xlsx') check_dm(refdm, testdm) io.writexlsx(testdm, 'tmp.xlsx') with pytest.warns(UserWarning): # Not all rows have column C testdm = io.readxlsx('tmp.xlsx') check_dm(refdm, testdm)
def test_floatcolumn(): _test_numericcolumn(FloatColumn) _test_copying(FloatColumn) # Test automatic conversion to float dm = DataMatrix(length=2) dm.col = FloatColumn dm.col = 1.9, '2.9' check_col(dm.col, [1.9, 2.9]) # Test nans dm.col = 'nan' check_col(dm.col, [np.nan, np.nan]) dm.col = None check_col(dm.col, [np.nan, np.nan]) dm.col = np.nan check_col(dm.col, [np.nan, np.nan]) dm.col = 'x' check_col(dm.col, [np.nan, np.nan]) # Test infs dm.col = 'inf' check_col(dm.col, [np.inf, np.inf]) dm.col = np.inf check_col(dm.col, [np.inf, np.inf]) # Test nans and infs dm.col = 'nan', 'inf' check_col(dm.col, [np.nan, np.inf]) dm.col = np.inf, np.nan check_col(dm.col, [np.inf, np.nan]) dm.col = 'x', None check_col(dm.col, [np.nan, np.nan]) # Check dtype ok_(dm.col._seq.dtype == np.float64) check_integrity(dm)
def test_intcolumn(): _test_numericcolumn(IntColumn) _test_copying(IntColumn) # Test automatic conversion to int dm = DataMatrix(length=2) dm.col = IntColumn dm.col = 1.9, '2.9' check_col(dm.col, [1, 2]) # Test setting invalid values @raises(TypeError) def _(): dm.col[0] = 'x' _() @raises(TypeError) def _(): dm.col = 'x' _() @raises(TypeError) def _(): dm.col[:-1] = 'x' _() # Check dtype ok_(dm.col._seq.dtype == np.int64) check_integrity(dm)
def process_frame(run, frame, lm, cm): dm = DataMatrix(length=len(SUBJECTS)) dm.frame = frame dm.sub = IntColumn dm.x = FloatColumn dm.y = FloatColumn dm.pupil = FloatColumn dm.luminance = FloatColumn dm.change = FloatColumn print('Run {}, frame {}'.format(run, frame)) for row, sub in zip(dm, SUBJECTS): _dm = _get_subject_data(sub, run) _dm.pupil = ops.z(_dm.pupil) try: _row = (_dm.frame == frame)[0] except IndexError: continue row.sub = sub x = min(1279, max(0, _row.x)) y = min(546, max(0, _row.y)) if not x and not y: row.x = np.nan row.y = np.nan row.pupil = np.nan row.luminance = np.nan row.change = np.nan else: row.x = x row.y = y row.pupil = _row.pupil row.luminance = lm[int(y), int(x)] row.change = cm[int(y), int(x)] return dm
def check_desc_stats(col_type, invalid, assert_invalid): dm = DataMatrix(length=4, default_col_type=col_type) # Even lengths dm.col = 1, 2, 3, 10 check_even(dm) if col_type is not IntColumn: dm.length = 5 dm.col = 1, 2, 3, 10, invalid check_even(dm) # Odd lengths (and even with one invalid) dm.length = 3 dm.col = 1, 2, 10 check_odd(dm) if col_type is not IntColumn: dm.length = 4 dm.col[3] = invalid check_odd(dm) # One lengths dm.length = 1 dm.col = 1 assert dm.col.mean == 1 assert dm.col.median == 1 assert_invalid(dm.col.std) assert dm.col.min == 1 assert dm.col.max == 1 assert dm.col.sum == 1 # Zero lengths dm.length = 0 assert_invalid(dm.col.mean) assert_invalid(dm.col.median) assert_invalid(dm.col.std) assert_invalid(dm.col.min) assert_invalid(dm.col.max) assert_invalid(dm.col.sum)
def glmer_series(dm, formula, family, winlen=1): col = formula.split()[0] depth = dm[col].depth rm = None for i in range(0, depth, winlen): wm = dm[:] wm[col] = series.reduce_( series.window(wm[col], start=i, end=i + winlen)) lm = glmer(wm, formula, family=family) print('Sample %d' % i) print(lm) if rm is None: rm = DataMatrix(length=len(lm)) rm.effect = list(lm.effect) rm.p = SeriesColumn(depth=depth) rm.z = SeriesColumn(depth=depth) rm.est = SeriesColumn(depth=depth) rm.se = SeriesColumn(depth=depth) for lmrow, rmrow in zip(lm, rm): rmrow.p[i:i + winlen] = lmrow.p rmrow.z[i:i + winlen] = lmrow.z rmrow.est[i:i + winlen] = lmrow.est rmrow.se[i:i + winlen] = lmrow.se return rm
def check_mixedcolumn_sorting(): dm = DataMatrix(length=24) dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf', NAN, NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta', 'None', '' ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ -INF, -INF, 1, 1, 1.1, 1.1, 2, 2, 2.1, 2.1, INF, INF, '', 'None', 'alpha', 'beta', None, None, None, None, NAN, NAN, NAN, NAN, ])
def _empty_col(self, datamatrix=None): """ visible: False desc: Create an empty column of the same type as the current column. keywords: datamatrix: The DataMatrix to which the empty column should belong or None. If None, then the DataMatrix of current column is used unless it has a different length, in which case a new DataMatrix object is initialized. returns: BaseColumn """ if datamatrix: return self.__class__(datamatrix) # If this column results from slicing from an original column, the # rowids do not match with the DataMatrix. In that case, create a new # DataMatrix for the empty column. if len(self) != len(self._datamatrix): return self.__class__(DataMatrix(length=len(self))) return self.__class__(self._datamatrix)
def dm(self): died = [] pids = [os.getpid()] states = ['running'] descs = ['MainProcess'] for pid, description in self._processes.items(): if not psutil.pid_exists(pid): self._ended.append((pid, description)) died.append(pid) continue pids.append(pid) states.append('running') descs.append(description) for pid in died: del self._processes[pid] if cfg.subprocess_manager_show_ended: for pid, description in self._ended: pids.append(pid) states.append('ended') descs.append(description) dm = DataMatrix(length=len(pids)) dm.pid = pids dm.state = states dm.description = descs return dm
def test_io(): refdm = DataMatrix(length=3) refdm[u'tést'] = 1, 2, u'' refdm.B = u'mathôt', u'b', u'x' refdm.C = u'a,\\b"\'c', 8, u'' testdm = io.readtxt('testcases/data/data.csv') check_dm(refdm, testdm) io.writetxt(testdm, 'tmp.csv') testdm = io.readtxt('tmp.csv') check_dm(refdm, testdm) refdm = io.readtxt('testcases/data/line-ending-cr.csv') check_dm(refdm, testdm) refdm = io.readtxt('testcases/data/line-ending-crlf.csv') check_dm(refdm, testdm) io.writepickle(testdm, 'tmp.pickle') testdm = io.readpickle('tmp.pickle') check_dm(refdm, testdm) io.writexlsx(testdm, 'tmp.xlsx') testdm = io.readxlsx('tmp.xlsx') check_dm(refdm, testdm)
def generatedata(effectsize=EFFECTSIZE, blinksinbaseline=BLINKSINBASELINE, **kwargs): dm = DataMatrix(length=TRACES) dm.c = IntColumn dm.c[:TRACES // 2] = 1 dm.c[TRACES // 2:] = 2 dm.y = SeriesColumn(depth=TRACELEN) dm.y.setallrows(a) dm.y += np.random.randint(NOISERANGE[0], NOISERANGE[1], TRACES) dm.y[TRACES // 2:] += np.linspace(0, effectsize, TRACELEN) # Inroduce blinks for i, row in enumerate(dm): blinklen = np.random.randint(BLINKLEN[0], BLINKLEN[1], BLINKS) if i < blinksinbaseline: blinkstart = np.array([1]) else: blinkstart = np.random.randint(BASELINE[1], TRACELEN, BLINKS) blinkend = blinkstart + blinklen for start, end in zip(blinkstart, blinkend): end = min(TRACELEN - 1, end) if end - start < 2 * BLINKMARGIN: continue row.y[start:start+BLINKMARGIN] = \ np.linspace(row.y[start-1], 0, BLINKMARGIN) row.y[end-BLINKMARGIN:end] = \ np.linspace(0, row.y[end], BLINKMARGIN) row.y[start:end] = np.random.randint(0, 100, end - start) return dm
def check_intcolumn_typing(): dm = DataMatrix(length=4, default_col_type=IntColumn) dm.f = 1.1, '1.8', 2, '2' ok_(all(isinstance(v, int) for v in dm.f)) @raises(TypeError) def _(): dm.inf = INF, -INF, 'inf', '-inf' _() @raises(TypeError) def _(): dm.nan = NAN, NAN, 'nan', 'nan' _() @raises(TypeError) def _(): dm.none = None, None, None, None _() @raises(TypeError) def _(): dm.s = 'alpha', 'beta', 'None', ' ' _() @raises(TypeError) def _(): dm.err = Exception, tuple, str, map _()
def from_json(s): """ desc: | *Requires json_tricks* Creates a DataMatrix from a `json` string. arguments: s: desc: A json string. type: str returns: desc: A DataMatrix. type: DataMatrix. """ import json_tricks d = json_tricks.loads(s) dm = DataMatrix(length=len(d['rowid'])) for name, (coltype, seq) in d['columns'].items(): if coltype == '_SeriesColumn': dm[name] = SeriesColumn(depth=seq.shape[1]) dm[name]._seq = seq else: dm[name] = globals()[coltype] dm[name]._seq = seq return dm
def test_fullfactorial(): dm = DataMatrix(length=3) dm.a = 'a', 'b', '' dm.b = 0, 1, 2 dm = ops.fullfactorial(dm) check_col(dm.a, ['a', 'b', 'a', 'b', 'a', 'b']) check_col(dm.b, [0, 0, 1, 1, 2, 2])
def test_weight(): dm = DataMatrix(length=3) dm.a = 'a', 'b', 'c' dm.b = 1, 0, 2 dm = ops.weight(dm.b) check_col(dm.a, ['a', 'c', 'c']) check_col(dm.b, [1, 2, 2])