def test_combine(): "Test combine()" ds1 = datasets.get_uts() ds2 = datasets.get_uts() ds = combine((ds1, ds2)) assert_array_equal(ds2['Y'].x, ds['Y'].x[ds1.n_cases:], "Basic combine") del ds1['Y'] del ds2['YCat'] ds = combine((ds1, ds2)) assert_array_equal(ds2['Y'].x, ds['Y'].x[ds1.n_cases:], "Combine with " "missing Var") ok_(np.all(ds1['YCat'] == ds['YCat'][:ds1.n_cases]), "Combine with missing " "Factor") assert_raises(TypeError, combine, (ds2['A'], ds2['Y'])) # combine NDVar with unequel dimensions ds = datasets.get_uts(utsnd=True) y = ds['utsnd'] y1 = y.sub(sensor=['0', '1', '2', '3']) y2 = y.sub(sensor=['1', '2', '3', '4']) ds1 = Dataset((y1, )) ds2 = Dataset((y2, )) dsc = combine((ds1, ds2)) y = dsc['utsnd'] eq_(y.sensor.names, ['1', '2', '3'], "Sensor dimension " "intersection failed.") dims = ('case', 'sensor', 'time') ref = np.concatenate((y1.get_data(dims)[:, 1:], y2.get_data(dims)[:, :3])) assert_array_equal(y.get_data(dims), ref, "combine utsnd")
def test_tsv_io(): """Test tsv I/O""" names = ['A', 'B', 'rm', 'intvar', 'fltvar', 'fltvar2', 'index'] # get Dataset ds = datasets.get_uv() ds['fltvar'][5:10] = np.nan ds[:4, 'rm'] = '' # save and load tempdir = tempfile.mkdtemp() try: dst = os.path.join(tempdir, 'ds.txt') ds.save_txt(dst) ds1 = load.tsv(dst) ds2 = load.tsv(dst, skiprows=1, names=names) assert_dataset_equal(ds1, ds, "TSV write/read test failed", 10) assert_dataset_equal(ds2, ds, "TSV write/read test failed", 10) # guess data types with missing intvar2 = ds['intvar'].as_factor() intvar2[10:] = '' ds_intvar = Dataset((intvar2,)) ds_intvar.save_txt(dst) ds_intvar1 = load.tsv(dst, empty='nan') assert_dataobj_equal(ds_intvar1['intvar', :10], ds['intvar', :10]) assert_array_equal(ds_intvar1['intvar', 10:], np.nan) finally: shutil.rmtree(tempdir)
def test_tsv_io(): """Test tsv I/O""" names = ['A', 'B', 'rm', 'intvar', 'fltvar', 'fltvar2', 'index'] # get Dataset ds = datasets.get_uv() ds['fltvar'][5:10] = np.nan ds[:4, 'rm'] = '' # save and load tempdir = tempfile.mkdtemp() try: dst = os.path.join(tempdir, 'ds.txt') ds.save_txt(dst) ds1 = load.tsv(dst) ds2 = load.tsv(dst, skiprows=1, names=names) assert_dataset_equal(ds1, ds, "TSV write/read test failed", 10) assert_dataset_equal(ds2, ds, "TSV write/read test failed", 10) # guess data types with missing intvar2 = ds['intvar'].as_factor() intvar2[10:] = '' ds_intvar = Dataset((intvar2, )) ds_intvar.save_txt(dst) ds_intvar1 = load.tsv(dst, empty='nan') assert_dataobj_equal(ds_intvar1['intvar', :10], ds['intvar', :10]) assert_array_equal(ds_intvar1['intvar', 10:], np.nan) finally: shutil.rmtree(tempdir)
def test_tsv_io(): """Test tsv I/O""" tempdir = TempDir() names = ['A', 'B', 'rm', 'intvar', 'fltvar', 'fltvar2', 'index'] ds = datasets.get_uv() ds['fltvar'][5:10] = np.nan ds[:4, 'rm'] = '' # save and load dst = Path(tempdir) / 'ds.txt' ds.save_txt(dst) ds1 = load.tsv(dst, random='rm') assert_dataset_equal(ds1, ds, decimal=10) ds1 = load.tsv(dst, skiprows=1, names=names, random='rm') assert_dataset_equal(ds1, ds, decimal=10) # delimiter for delimiter in [' ', ',']: ds.save_txt(dst, delimiter=delimiter) ds1 = load.tsv(dst, delimiter=delimiter, random='rm') assert_dataset_equal(ds1, ds, decimal=10) # guess data types with missing intvar2 = ds['intvar'].as_factor() intvar2[10:] = '' ds_intvar = Dataset((intvar2, )) ds_intvar.save_txt(dst) ds_intvar1 = load.tsv(dst, empty='nan') assert_dataobj_equal(ds_intvar1['intvar', :10], ds['intvar', :10]) assert_array_equal(ds_intvar1['intvar', 10:], np.nan) # str with space ds[:5, 'A'] = 'a 1' ds.save_txt(dst) ds1 = load.tsv(dst, random='rm') assert_dataset_equal(ds1, ds, decimal=10) ds.save_txt(dst, delimiter=' ') ds1 = load.tsv(dst, delimiter=' ', random='rm') assert_dataset_equal(ds1, ds, decimal=10) # Fixed column width path = file_path('fox-prestige') ds = load.tsv(path, delimiter=' ', skipinitialspace=True) assert ds[1] == { 'id': 'GENERAL.MANAGERS', 'education': 12.26, 'income': 25879, 'women': 4.02, 'prestige': 69.1, 'census': 1130, 'type': 'prof' }
def t_stop_ds(ds: Dataset, t: float): "Dummy-event for the end of the last step" t_stop = ds.info['tstop'] + t out = {} for k, v in ds.items(): if k == 'time': out['time'] = Var([t_stop]) elif isinstance(v, Var): out[k] = Var(numpy.asarray([0], v.x.dtype)) elif isinstance(v, Factor): out[k] = Factor(['']) else: raise ValueError(f"{k!r} in predictor: {v!r}") return Dataset(out)
def test_anova_r_adler(): """Test ANOVA accuracy by comparing with R (Adler dataset of car package) An unbalanced 3 by 2 independent measures design. """ from rpy2.robjects import r # "Adler" dataset r_require('car') ds = Dataset.from_r('Adler') # with balanced data dsb = ds.equalize_counts('expectation % instruction') dsb.to_r('AdlerB') aov = test.anova('rating', 'instruction * expectation', ds=dsb) fs = run_on_lm_fitter('rating', 'instruction * expectation', dsb) print r('a.aov <- aov(rating ~ instruction * expectation, AdlerB)') print r('a.summary <- summary(a.aov)') r_res = r['a.summary'][0] assert_f_tests_equal(aov.f_tests, r_res, fs) # with unbalanced data; for Type II SS use car package aov = test.anova('rating', 'instruction * expectation', ds=ds) fs = run_on_lm_fitter('rating', 'instruction * expectation', ds) r_res = r("Anova(lm(rating ~ instruction * expectation, Adler, type=2))") assert_f_tests_equal(aov.f_tests, r_res, fs, 'Anova') # single predictor aov = test.anova('rating', 'instruction', ds=ds) fs = run_on_lm_fitter('rating', 'instruction', ds) r_res = r("Anova(lm(rating ~ instruction, Adler, type=2))") assert_f_test_equal(aov.f_tests[0], r_res, 0, fs[0], 'Anova')
def test_anova_r_adler(): """Test ANOVA accuracy by comparing with R (Adler dataset of car package) An unbalanced 3 by 2 independent measures design. """ from rpy2.robjects import r # "Adler" dataset r_require('car') ds = Dataset.from_r('Adler') # with balanced data dsb = ds.equalize_counts('expectation % instruction') dsb.to_r('AdlerB') aov = test.ANOVA('rating', 'instruction * expectation', ds=dsb) fs = run_on_lm_fitter('rating', 'instruction * expectation', dsb) fnds = run_as_ndanova('rating', 'instruction * expectation', dsb) print(r('a.aov <- aov(rating ~ instruction * expectation, AdlerB)')) print(r('a.summary <- summary(a.aov)')) r_res = r['a.summary'][0] assert_f_tests_equal(aov.f_tests, r_res, fs, fnds) # with unbalanced data; for Type II SS use car package aov = test.ANOVA('rating', 'instruction * expectation', ds=ds) fs = run_on_lm_fitter('rating', 'instruction * expectation', ds) fnds = run_as_ndanova('rating', 'instruction * expectation', ds) r_res = r("Anova(lm(rating ~ instruction * expectation, Adler, type=2))") assert_f_tests_equal(aov.f_tests, r_res, fs, fnds, 'Anova') # single predictor aov = test.ANOVA('rating', 'instruction', ds=ds) fs = run_on_lm_fitter('rating', 'instruction', ds) fnds = run_as_ndanova('rating', 'instruction', ds) r_res = r("Anova(lm(rating ~ instruction, Adler, type=2))") assert_f_test_equal(aov.f_tests[0], r_res, 0, fs[0], fnds[0], 'Anova')
def test_anova_r_sleep(): "Test ANOVA accuracy by comparing with R (sleep dataset)" from rpy2.robjects import r # "sleep" dataset print r('data(sleep)') ds = Dataset.from_r('sleep') ds['ID'].random = True # independent measures aov = test.anova('extra', 'group', ds=ds) fs = run_on_lm_fitter('extra', 'group', ds) print r('sleep.aov <- aov(extra ~ group, sleep)') print r('sleep.summary <- summary(sleep.aov)') r_res = r['sleep.summary'][0] assert_f_test_equal(aov.f_tests[0], r_res, 0, fs[0]) # repeated measures aov = test.anova('extra', 'group * ID', ds=ds) fs = run_on_lm_fitter('extra', 'group * ID', ds) print r('sleep.aov <- aov(extra ~ group + Error(ID / group), sleep)') print r('sleep.summary <- summary(sleep.aov)') r_res = r['sleep.summary'][1][0] assert_f_test_equal(aov.f_tests[0], r_res, 0, fs[0]) # unbalanced (independent measures) ds2 = ds[1:] print r('sleep2 <- subset(sleep, (group == 2) | (ID != 1))') aov = test.anova('extra', 'group', ds=ds2) fs = run_on_lm_fitter('extra', 'group', ds2) print r('sleep2.aov <- aov(extra ~ group, sleep2)') print r('sleep2.summary <- summary(sleep2.aov)') r_res = r['sleep2.summary'][0] assert_f_test_equal(aov.f_tests[0], r_res, 0, fs[0])
def _generate_continuous( self, uts: UTS, # time axis for the output ds: Dataset, # events stim_var: str, code: Code, directory: Path, ): # place multiple input files into a continuous predictor cache = { stim: self._load(uts.tstep, code.with_stim(stim).nuts_file_name(self.columns), directory) for stim in ds[stim_var].cells } # determine type stim_type = {type(s) for s in cache.values()} assert len(stim_type) == 1 stim_type = stim_type.pop() # generate x if stim_type is Dataset: dss = [] for t, stim in ds.zip('T_relative', stim_var): x = cache[stim].copy() x['time'] += t dss.append(x) if code.nuts_method: x_stop_ds = t_stop_ds(x, t) dss.append(x_stop_ds) x = self._ds_to_ndvar(combine(dss), uts, code) elif stim_type is NDVar: v = cache[ds[0, stim_var]] dimnames = v.get_dimnames(first='time') dims = (uts, *v.get_dims(dimnames[1:])) x = NDVar.zeros(dims, code.key) for t, stim in ds.zip('T_relative', stim_var): x_stim = cache[stim] i_start = uts._array_index(t + x_stim.time.tmin) i_stop = i_start + len(x_stim.time) if i_stop > len(uts): raise ValueError( f"{code.string_without_rand} for {stim} is longer than the data" ) x.x[i_start:i_stop] = x_stim.get_data(dimnames) else: raise RuntimeError(f"stim_type={stim_type!r}") return x
def _align_index( self, words: Sequence[str], silence: Any = None, missing: Any = None, search_distance: int = 6, ) -> Sequence: """Index into ``words``""" # search_order j_pairs = list(product(range(search_distance), repeat=2)) j_pairs.pop(0) j_pairs.sort(key=lambda x: x[0]**2 + x[1]**2) # input sequences words_ = [word.upper() for word in words] n_words = len(words_) grid_words = [r.graphs.upper() for r in self.realizations] n_grid = len(grid_words) # counters i_grid = i_word = 0 # i_next: start of unused words in ``words`` out = [] while i_grid < n_grid: # silence if grid_words[i_grid] == ' ': out.append(silence) i_grid += 1 continue # direct match if grid_words[i_grid] == words_[i_word]: out.append(i_word) i_grid += 1 i_word += 1 continue # grid search for closest match for j_grid, j_word in j_pairs: if grid_words[i_grid + j_grid] == words_[i_word + j_word]: break else: # informative error message start = min([i_grid, i_word, 2]) stop = min([10, n_grid - i_grid, n_words - i_word]) ds = Dataset() ds['grid_words'] = grid_words[i_grid - start: i_grid + stop] ds['words'] = words[i_word - start: i_word + stop] raise ValueError(f"No match within search_distance {search_distance}:\n{ds}") # need to fill in one value for each skipped grid ii_word = 0 for ii_grid in range(j_grid): if grid_words[i_grid + ii_grid] == ' ': out.append(silence) elif ii_word < j_word: out.append(i_word + ii_word) ii_word += 1 else: out.append(missing) # append the next match out.append(i_word + j_word) i_grid += j_grid + 1 i_word += j_word + 1 return out
def test_r(): "Test interaction with R thorugh rpy2" from rpy2.robjects import r r("data(sleep)") ds = Dataset.from_r("sleep") assert_equal(ds.name, 'sleep') extra = (0.7, -1.6, -0.2, -1.2, -0.1, 3.4, 3.7, 0.8, 0.0, 2.0, 1.9, 0.8, 1.1, 0.1, -0.1, 4.4, 5.5, 1.6, 4.6, 3.4) assert_array_equal(ds.eval('extra'), extra) assert_array_equal(ds.eval('ID'), map(str, xrange(1, 11)) * 2) assert_array_equal(ds.eval('group'), ['1'] * 10 + ['2'] * 10) # test putting ds.to_r('sleep_copy') ds_copy = Dataset.from_r('sleep_copy') assert_dataset_equal(ds_copy, ds)
def test_r(): "Test interaction with R through rpy2" from rpy2.robjects import r r("data(sleep)") ds = Dataset.from_r("sleep") eq_(ds.name, 'sleep') extra = (0.7, -1.6, -0.2, -1.2, -0.1, 3.4, 3.7, 0.8, 0.0, 2.0, 1.9, 0.8, 1.1, 0.1, -0.1, 4.4, 5.5, 1.6, 4.6, 3.4) assert_array_equal(ds.eval('extra'), extra) assert_array_equal(ds.eval('ID'), list(map(str, range(1, 11))) * 2) assert_array_equal(ds.eval('group'), ['1'] * 10 + ['2'] * 10) # test putting ds.to_r('sleep_copy') ds_copy = Dataset.from_r('sleep_copy') assert_dataset_equal(ds_copy, ds)
def test_combine(): "Test combine()" ds1 = datasets.get_uts() ds2 = datasets.get_uts() n = ds1.n_cases ds = combine((ds1, ds2)) assert_array_equal(ds2['Y'].x, ds['Y'].x[n:]) # combine Datasets with unequal keys del ds1['Y'] # raise assert_raises(KeyError, combine, (ds1, ds2)) assert_raises(KeyError, combine, (ds2, ds1)) # drop del ds2['YCat'] ds = combine((ds1, ds2), incomplete='drop') ok_('Y' not in ds) ok_('YCat' not in ds) # fill in ds = combine((ds1, ds2), incomplete='fill in') assert_array_equal(ds['Y'].x[n:], ds2['Y'].x) assert_array_equal(np.isnan(ds['Y'].x[:n]), True) assert_array_equal(ds['YCat'][:n], ds1['YCat']) assert_array_equal(ds['YCat'][n:], '') # invalid input assert_raises(ValueError, combine, ()) assert_raises(TypeError, combine, (ds2['A'], ds2['Y'])) # combine NDVar with unequel dimensions ds = datasets.get_uts(utsnd=True) y = ds['utsnd'] y1 = y.sub(sensor=['0', '1', '2', '3']) y2 = y.sub(sensor=['1', '2', '3', '4']) ds1 = Dataset((y1,), info={'a': np.arange(2), 'b': [np.arange(2)]}) ds2 = Dataset((y2,), info={'a': np.arange(2), 'b': [np.arange(2)]}) dsc = combine((ds1, ds2)) y = dsc['utsnd'] eq_(list(y.sensor.names), ['1', '2', '3'], "Sensor dimension intersection") dims = ('case', 'sensor', 'time') ref = np.concatenate((y1.get_data(dims)[:, 1:], y2.get_data(dims)[:, :3])) assert_array_equal(y.get_data(dims), ref, "combine utsnd") # info assert_array_equal(dsc.info['a'], np.arange(2)) eq_(len(dsc.info['b']), 1) assert_array_equal(dsc.info['b'][0], np.arange(2))
def align( self, words: Sequence[str], values: Sequence[Any], silence: Any = 0, unknown: str = None, ) -> List[Any]: """Align ``words`` to the textgrid and sort ``values`` accordingly Parameters ---------- words The words to align (not case-sensitive). Individual words in ``words`` can be skipped, but all words in the TextGrid need to occur exactly in ```words``. values Values corresponding to ``words``. silence Value to append to the output for silence in the TextGrid. unknown String to signify unknown words in ``words`` (able to pair with any word in the TextGrid). Returns ------- aligned_values Values from ``values`` aligned with the TextGrid's ``realizations``. """ n_words = len(words) assert len(values) == n_words grid_words = [r.graphs for r in self.realizations] i_next = last_match_i = last_match_i_grid = 0 # i_next: start of unused words in ``words`` out = [] for i_grid, grid_word in enumerate(grid_words): if grid_word == ' ': out.append(silence) else: for i in range(i_next, n_words): word_i = words[i] if word_i == unknown: break elif grid_word == 'CANNOT' and word_i == 'can' and words[i + 1] == 'not': assert words[i + 2] != 'not' and words[i + 3] != 'not' break word_i = word_i.strip("'") if word_i.upper() == grid_word: break else: n = min(9, len(grid_words) - i_grid, len(words) - last_match_i) ds = Dataset() ds['grid_words'] = grid_words[last_match_i_grid: last_match_i_grid + n] ds['words'] = words[last_match_i: last_match_i + n] raise ValueError(f"Can't align words to {self._name} after word {i_next}:\n{ds}") out.append(values[i]) last_match_i = i last_match_i_grid = i_grid i_next = i + 1 return out
def test_ols(): "Test NDVar.ols() method" from rpy2.robjects import r # simulate data ds = datasets.get_rand(True) n_times = len(ds['uts'].time) x = np.zeros(n_times) x[20:40] = np.hanning(20) utsc = ds.eval("uts.copy()") utsc.x += ds['Y'].x[:, None] * x[None, :] ds_ = Dataset() ds_['x'] = Var(ds['Y'].x) ds_['x2'] = ds_['x'] + np.random.normal(0, 1, ds.n_cases) # ols regression m1 = ds_['x'] b1 = utsc.ols(m1) res1 = utsc.residuals(m1) m2 = ds_.eval("x + x2") b2 = utsc.ols(m2) res2 = utsc.residuals(m2) # compare with R for i in xrange(n_times): ds_['y'] = Var(utsc.x[:, i]) ds_.to_r('ds') # 1 predictor r('lm1 <- lm(y ~ x, ds)') beta = r('coef(lm1)')[1] assert_almost_equal(b1.x[0, i], beta) res = r('residuals(lm1)') assert_array_almost_equal(res1.x[:, i], res) # 2 predictors r('lm2 <- lm(y ~ x + x2, ds)') beta = r('coef(lm2)')[1:] assert_array_almost_equal(b2.x[:, i], beta) res = r('residuals(lm2)') assert_array_almost_equal(res2.x[:, i], res) # 3d utsnd = ds['utsnd'] ds_['utsnd'] = utsnd b1 = ds_.eval("utsnd.ols(x)") res1 = ds_.eval("utsnd.residuals(x)") for i in xrange(len(b1.time)): ds_['y'] = Var(utsnd.x[:, 1, i]) ds_.to_r('ds') # 1 predictor r('lm1 <- lm(y ~ x, ds)') beta = r('coef(lm1)')[1] assert_almost_equal(b1.x[0, 1, i], beta) res = r('residuals(lm1)') assert_array_almost_equal(res1.x[:, 1, i], res)
def gen_triggers(): raw = Var([], info={'sfreq': SAMPLINGRATE}) ds = Dataset(info={ 'subject': SUBJECT, 'session': 'cheese', 'raw': raw, 'sfreq': SAMPLINGRATE }) ds['trigger'] = Var(TRIGGERS) ds['i_start'] = Var(I_START) return ds
def test_dataset_indexing(): """Test Dataset indexing""" ds = datasets.get_uv() # indexing values eq_(ds['A', 1], ds['A'][1]) eq_(ds[1, 'A'], ds['A'][1]) # indexing variables assert_dataobj_equal(ds[:, 'A'], ds['A']) assert_dataobj_equal(ds['A', :], ds['A']) assert_dataobj_equal(ds[:10, 'A'], ds['A'][:10]) assert_dataobj_equal(ds['A', :10], ds['A'][:10]) # new Dataset through indexing ds2 = Dataset() ds2['A'] = ds['A'] assert_dataset_equal(ds[('A',)], ds2) ds2['B'] = ds['B'] assert_dataset_equal(ds['A', 'B'], ds2) assert_dataset_equal(ds[('A', 'B'), :10], ds2[:10]) assert_dataset_equal(ds[:10, ('A', 'B')], ds2[:10]) # assigning value ds[2, 'A'] = 'hello' eq_(ds[2, 'A'], 'hello') ds['A', 2] = 'not_hello' eq_(ds[2, 'A'], 'not_hello') # assigning new factor ds['C', :] = 'c' ok_(np.all(ds.eval("C == 'c'"))) # assigning new Var ds['D1', :] = 5. ds[:, 'D2'] = 5. assert_array_equal(ds['D1'], 5) assert_array_equal(ds['D2'], 5) # test illegal names f = Factor('aaabbb') assert_raises(ValueError, ds.__setitem__, '%dsa', f) assert_raises(ValueError, ds.__setitem__, '432', f) assert_raises(ValueError, ds.__setitem__, ('%dsa', slice(None)), 'value') assert_raises(ValueError, ds.__setitem__, (slice(None), '%dsa'), 'value') assert_raises(ValueError, ds.__setitem__, ('432', slice(None)), 4.) assert_raises(ValueError, ds.__setitem__, (slice(None), '432'), 4.) # deleting items del ds['A'] ok_('A' not in ds) assert_raises(KeyError, ds.__getitem__, 'A') del ds['B', 'rm'] ok_('B' not in ds and 'rm' not in ds)
def __init__(self, string): m = re.match( r'(?:([\w+-]+)\|)?' # stimulus r'([\w:-]+)' # pedictor code r'(?:\$' # begin shuffling r'(?:\[(-?\d+-?|\w*)\])?' # band/index r'([a-zA-Z]+)(\d*))?$', string) if not m: raise CodeError(string, "not a valid code") stim, code_string, shuffle_index, shuffle, angle = m.groups() if shuffle: index_str = '' if shuffle_index is None else f'[{shuffle_index}]' self.shuffle_string = f"${index_str}{shuffle}{angle}" self.code_with_rand = f'{code_string}{self.shuffle_string}' if angle: angle = int(angle) if angle == 180: raise CodeError(string, "shuffle angle '180' should be omitted") elif not 360 > angle > 0: raise CodeError(string, f"shuffle angle {angle}") else: angle = 180 if shuffle_index: m = re.match(r'^(-?)(\d+)(-?)$', shuffle_index) if m: pre, index, post = m.groups() if pre: if post: raise ValueError(f'{string!r} (shuffle index)') shuffle_index = slice(int(index)) elif post: shuffle_index = slice(int(index), None) else: shuffle_index = int(index) else: shuffle_index = None else: self.code_with_rand = code_string self.shuffle_string = '' shuffle_index = shuffle = angle = None self.stim = stim or None self.code = code_string self.shuffle = shuffle self.shuffle_index = shuffle_index self.shuffle_angle = angle CodeBase.__init__(self, string, code_string) self.has_randomization = shuffle in VALUE_SHUFFLE_METHODS or '>' in string self.has_permutation = shuffle in SHUFFLE_METHODS or '>' in string self._shuffle_done = False self.key = Dataset.as_key(self.string)
def align_word_dataset( self, ds: Dataset, words: FactorArg = 'word', ) -> Dataset: """Align ``ds`` to the TextGrid Parameters ---------- ds Dataset with data to align. words Words in ``ds`` to use to align to the TextGrid words. Returns ------- aligned_ds Dataset with the variables in ``ds`` aligned to the TextGrid, including time stamps and TextGrid words. """ words_ = asfactor(words, ds=ds) index = self._align_index(words_, silence=-1, missing=-2) out = Dataset( { 'time': Var([r.times[0] for r in self.realizations]), 'grid_word': Factor([r.graphs for r in self.realizations]), }, info={'tstop': self.realizations[-1].tstop}) for key, variable in ds.items(): if isinstance(variable, (Var, Factor)): values = dict(enumerate(variable)) if isinstance(variable, Var): values[-1] = values[ -2] = False # coerced to 0 unless all values are boolean out[key] = Var([values[i] for i in index]) else: values[-1] = values[-2] = '' out[key] = Factor([values[i] for i in index], random=variable.random) return out
def test_dataset(): "Basic dataset operations" ds = Dataset() # naming ds['f'] = Factor('abab') eq_(ds['f'].name, 'f') # ds.add() assert_raises(ValueError, ds.add, Factor('aabb')) ds.add(Factor('aabb', name='g')) eq_(ds['g'].name, 'g') # ds.update() ds = Dataset() ds.update({'f': Factor('abab')}) eq_(ds['f'].name, 'f')
def test_longname(): "Test info['longname'] entry" ds = Dataset() u = Var([2], 'u') v = Var([1], 'v') # simple operations, also tested in test_var() eq_(longname(v.abs()), 'abs(v)') eq_(longname(u * v), "u * v") eq_(longname(u * v.abs()), "u * abs(v)") # Dataset assigning ds['abs_v'] = v.abs() eq_(longname(ds['abs_v']), 'abs_v')
def test_dataobjects(): "Test handing MNE-objects as data-objects" shift = np.array([0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, -0.1]) epochs = datasets.get_mne_epochs() ds = Dataset(('a', Factor('ab', repeat=8)), ('epochs', epochs)) ds['ets'] = shift_mne_epoch_trigger(epochs, shift, min(shift), max(shift)) # ds operations sds = ds.sub("a == 'a'") ads = ds.aggregate('a') # asndvar ndvar = asndvar(ds['epochs']) ndvar = asndvar(ds['ets']) # connectivity ds = datasets.get_mne_sample(sub=[0], sns=True) sensor = ds['meg'].sensor c = sensor.connectivity() assert_array_equal(c[:, 0] < c[:, 1], True) assert c.max() == len(sensor) - 1
def test_dataobjects(): "Test handing MNE-objects as data-objects" shift = np.array([ 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, -0.1 ]) epochs = datasets.get_mne_epochs() ds = Dataset({'a': Factor('ab', repeat=8), 'epochs': epochs}) ds['ets'] = shift_mne_epoch_trigger(epochs, shift, min(shift), max(shift)) # ds operations sds = ds.sub("a == 'a'") ads = ds.aggregate('a') # asndvar ndvar = asndvar(ds['epochs']) ndvar = asndvar(ds['ets']) # connectivity ds = datasets.get_mne_sample(sub=[0], sns=True) sensor = ds['meg'].sensor c = sensor.connectivity() assert_array_equal(c[:, 0] < c[:, 1], True) assert c.max() == len(sensor) - 1
def test_complement(): """Test design.complement()""" ds = Dataset() ds['A'] = Factor('abcabc') ds['B'] = Factor('bcabca') ds['C'] = Factor('cabcab') # underspecified assert_raises(ValueError, complement, ['A'], ds=ds) # correct comp = complement(['A', 'B'], ds=ds) ok_(np.all(comp == ds['C']), "Complement yielded %s instead of " "%s." % (comp, ds['C'])) # overspecified assert_raises(ValueError, complement, ['A', 'B', 'C'], ds=ds)
def test_complement(): """Test design.complement()""" ds = Dataset() ds['A'] = Factor('abcabc') ds['B'] = Factor('bcabca') ds['C'] = Factor('cabcab') # underspecified with pytest.raises(ValueError): complement(['A'], ds=ds) # correct comp = complement(['A', 'B'], ds=ds) assert np.all(comp == ds['C']), f"Complement yielded {comp} instead of {ds['C']}" # overspecified with pytest.raises(ValueError): complement(['A', 'B', 'C'], ds=ds)
def __init__(self, string): m = re.match(r'(?:([\w+-]+)\|)?([\w:-]+)(?:\$(-?\d*-?)([a-zA-Z]+)(\d*))?$', string) if not m: raise CodeError(string, "not a valid code") stim, code_string, shuffle_band, shuffle, angle = m.groups() if shuffle: self.code_with_rand = f'{code_string}${shuffle_band}{shuffle}{angle}' if angle: angle = int(angle) if angle == 180: raise CodeError(string, "shuffle angle '180' should be omitted") elif not 360 > angle > 0: raise CodeError(string, f"shuffle angle {angle}") else: angle = 180 if shuffle_band: m = re.match(r'^(-?)(\d+)(-?)$', shuffle_band) if not m: raise ValueError(f'{string!r} (shuffle index)') pre, index, post = m.groups() if pre: if post: raise ValueError(f'{string!r} (shuffle index)') shuffle_band = slice(int(index)) elif post: shuffle_band = slice(int(index), None) else: shuffle_band = int(index) else: shuffle_band = None else: self.code_with_rand = code_string shuffle_band = shuffle = angle = None self.stim = stim or None self.code = code_string self.shuffle = shuffle self.shuffle_band = shuffle_band self.shuffle_angle = angle CodeBase.__init__(self, string, code_string) self.has_randomization = shuffle in VALUE_SHUFFLE_METHODS or '>' in string self.has_permutation = shuffle in SHUFFLE_METHODS or '>' in string self._shuffle_done = False self.key = Dataset.as_key(self.string)
def test_dataset_sorting(): "Test Dataset sorting methods" test_array = np.arange(10) ds = Dataset() ds['v'] = Var(test_array) ds['f'] = Factor(test_array) # shuffle the Dataset rand_idx = test_array.copy() np.random.shuffle(rand_idx) ds_shuffled = ds[rand_idx] # ascending, Var, copy dsa = ds_shuffled.sorted('v') assert_dataset_equal(dsa, ds, "Copy sorted by Var, ascending") # descending, Factor, in-place ds_shuffled.sort('f', descending=True) assert_dataset_equal(ds_shuffled, ds[::-1], "In-place sorted by Factor, " "descending")
def dataset_based_key(self): term_keys = [Dataset.as_key(term.string) for term in self.terms] return '+'.join(sorted(term_keys))
def _ds_to_ndvar(self, ds: Dataset, uts: UTS, code: Code): if self.columns: column_key, mask_key = code.nuts_columns if column_key is None: column_key = 'value' ds[:, column_key] = 1 else: column_key = 'value' mask_key = 'mask' if 'mask' in ds else None if mask_key: mask = ds[mask_key].x assert mask.dtype.kind == 'b', "'mask' must be boolean" else: mask = None if code.shuffle_index: shuffle_mask = ds[code.shuffle_index].x if shuffle_mask.dtype.kind != 'b': raise code.error("shuffle index must be boolean", -1) elif code.shuffle == 'permute' and mask is not None: assert not numpy.any(shuffle_mask[~mask]) elif code.shuffle == 'permute': shuffle_mask = mask else: shuffle_mask = None if code.shuffle == 'remask': if mask is None: raise code.error("$remask for predictor without mask", -1) rng = code._get_rng() if shuffle_mask is None: rng.shuffle(mask) else: remask = mask[shuffle_mask] rng.shuffle(remask) mask[shuffle_mask] = remask code.register_shuffle(index=True) if mask is not None: ds[column_key] *= mask if code.shuffle == 'permute': rng = code._get_rng() if shuffle_mask is None: rng.shuffle(ds[column_key].x) else: values = ds[column_key].x[shuffle_mask] rng.shuffle(values) ds[column_key].x[shuffle_mask] = values code.register_shuffle(index=True) # prepare output NDVar if code.nuts_method == 'is': dim = Categorial('representation', ('step', 'impulse')) x = NDVar(numpy.zeros((2, len(uts))), (dim, uts), name=code.key) x_step, x_impulse = x else: x = NDVar(numpy.zeros(len(uts)), uts, name=code.key) if code.nuts_method == 'step': x_step, x_impulse = x, None elif not code.nuts_method: x_step, x_impulse = None, x else: raise code.error(f"NUTS-method={code.nuts_method!r}") # fill in values ds = ds[ds['time'] < uts.tstop] if x_impulse is not None: for t, v in ds.zip('time', column_key): x_impulse[t] = v if x_step is not None: t_stops = ds[1:, 'time'] if ds[-1, column_key] != 0: if 'tstop' not in ds.info: raise code.error( "For step representation, the predictor datasets needs to contain ds.info['tstop'] to determine the end of the last step", -1) t_stops = chain(t_stops, [ds.info['tstop']]) for t0, t1, v in zip(ds['time'], t_stops, ds[column_key]): x_step[t0:t1] = v return x
def test_celltable(): "Test the Celltable class." ds = datasets.get_uts() ds['cat'] = Factor('abcd', repeat=15) ct = Celltable('Y', 'A', ds=ds) eq_(ct.n_cases, 60) eq_(ct.n_cells, 2) eq_(repr(ct), "Celltable(Y, A)") eq_(repr(Celltable(ds['Y'].x, 'A', ds=ds)), "Celltable(<ndarray>, A)") eq_(repr(Celltable(ds['Y'].x, ds['A'].x, ds=ds)), "Celltable(<ndarray>, <Factor>)") ct = Celltable('Y', 'A', match='rm', ds=ds) eq_(ct.n_cases, 30) eq_(ct.n_cells, 2) # cat argument ct = Celltable('Y', 'cat', cat=('c', 'b'), ds=ds) eq_(ct.n_cases, 30) eq_(ct.X[0], 'c') eq_(ct.X[-1], 'b') assert_raises(ValueError, Celltable, 'Y', 'cat', cat=('c', 'e'), ds=ds) ct = Celltable('Y', 'A', match='rm', ds=ds) eq_(ct.n_cases, 30) assert np.all(ct.groups['a0'] == ct.groups['a1']) ct = Celltable('Y', 'cat', match='rm', cat=('c', 'b'), ds=ds) eq_(ct.n_cases, 30) eq_(ct.X[0], 'c') eq_(ct.X[-1], 'b') # catch unequal length assert_raises(ValueError, Celltable, ds['Y', :-1], 'cat', ds=ds) assert_raises(ValueError, Celltable, ds['Y', :-1], 'cat', match='rm', ds=ds) # coercion of numerical X X = ds.eval("A == 'a0'") ct = Celltable('Y', X, cat=(None, None), ds=ds) eq_(('False', 'True'), ct.cat) assert_array_equal(ct.data['True'], ds['Y', X]) ct = Celltable('Y', X, cat=('True', 'False'), ds=ds) eq_(('True', 'False'), ct.cat) assert_array_equal(ct.data['True'], ds['Y', X]) # test coercion of Y ct = Celltable(ds['Y'].x, 'A', ds=ds) assert_is_instance(ct.Y, np.ndarray) ct = Celltable(ds['Y'].x, 'A', ds=ds, coercion=asvar) assert_is_instance(ct.Y, Var) # test sub ds_sub = ds.sub("A == 'a0'") ct_sub = Celltable('Y', 'B', ds=ds_sub) ct = Celltable('Y', 'B', sub="A == 'a0'", ds=ds) assert_dataobj_equal(ct_sub.Y, ct.Y) # test sub with rm ct_sub = Celltable('Y', 'B', match='rm', ds=ds_sub) ct = Celltable('Y', 'B', match='rm', sub="A == 'a0'", ds=ds) assert_dataobj_equal(ct_sub.Y, ct.Y) # Interaction match ct = Celltable('Y', 'A', match='B % rm', ds=ds) ok_(ct.all_within) assert_dataobj_equal(combine((ct.data['a0'], ct.data['a1'])), ds['Y']) # test rm sorting ds = Dataset() ds['rm'] = Factor('abc', repeat=4) ds['Y'] = Var(np.arange(3.).repeat(4)) ds['X'] = Factor('ab', repeat=2, tile=3) idx = np.arange(12) np.random.shuffle(idx) ds = ds[idx] ct = Celltable('Y', 'X', 'rm', ds=ds) assert_array_equal(ct.match, Factor('abc', tile=2)) assert_array_equal(ct.Y, np.tile(np.arange(3.), 2)) assert_array_equal(ct.X, Factor('ab', repeat=3))
def test_ols(): "Test NDVar.ols() method" from rpy2.robjects import r # simulate data ds = datasets.get_uts(True) n_times = len(ds['uts'].time) x = np.zeros(n_times) x[20:40] = np.hanning(20) utsc = ds.eval("uts.copy()") utsc.x += ds['Y'].x[:, None] * x[None, :] ds_ = Dataset() ds_['x'] = Var(ds['Y'].x) ds_['x2'] = ds_['x'] + np.random.normal(0, 1, ds.n_cases) # ols regression m1 = ds_['x'] b1 = utsc.ols(m1) res1 = utsc.residuals(m1) t1 = utsc.ols_t(m1) m2 = ds_.eval("x + x2") b2 = utsc.ols(m2) res2 = utsc.residuals(m2) t2 = utsc.ols_t(m2) # compare with R for i in range(n_times): ds_['y'] = Var(utsc.x[:, i]) ds_.to_r('ds') # 1 predictor r('lm1 <- lm(y ~ x, ds)') beta = r('coef(lm1)')[1] assert_almost_equal(b1.x[0, i], beta) res = r('residuals(lm1)') assert_array_almost_equal(res1.x[:, i], res) t = r('coef(summary(lm1))')[5] assert_almost_equal(t1.x[0, i], t) # 2 predictors r('lm2 <- lm(y ~ x + x2, ds)') beta = r('coef(lm2)')[1:] assert_array_almost_equal(b2.x[:, i], beta) res = r('residuals(lm2)') assert_array_almost_equal(res2.x[:, i], res) lm2_coefs = r('coef(summary(lm2))') t = [lm2_coefs[7], lm2_coefs[8]] assert_array_almost_equal(t2.x[:, i], t) # 3d utsnd = ds['utsnd'] ds_['utsnd'] = utsnd b1 = ds_.eval("utsnd.ols(x)") res1 = ds_.eval("utsnd.residuals(x)") t1 = ds_.eval("utsnd.ols_t(x)") for i in range(len(b1.time)): ds_['y'] = Var(utsnd.x[:, 1, i]) ds_.to_r('ds') # 1 predictor r('lm1 <- lm(y ~ x, ds)') beta = r('coef(lm1)')[1] assert_almost_equal(b1.x[0, 1, i], beta) res = r('residuals(lm1)') assert_array_almost_equal(res1.x[:, 1, i], res) t = r('coef(summary(lm1))')[5] assert_almost_equal(t1.x[0, 1, i], t)
def test_ttest_rel(): "Test testnd.ttest_rel()" ds = datasets.get_uts(True) # basic res = testnd.ttest_rel('uts', 'A%B', ('a1', 'b1'), ('a0', 'b0'), 'rm', ds=ds, samples=100) assert repr( res ) == "<ttest_rel 'uts', 'A x B', ('a1', 'b1'), ('a0', 'b0'), 'rm' (n=15), samples=100, p < .001>" difference = res.masked_difference() assert difference.x.mask.sum() == 84 c1 = res.masked_c1() assert c1.x.mask.sum() == 84 assert_array_equal(c1.x.data, res.c1_mean.x) # alternate argspec res_ = testnd.ttest_rel("uts[A%B == ('a1', 'b1')]", "uts[A%B == ('a0', 'b0')]", ds=ds, samples=100) assert repr( res_) == "<ttest_rel 'uts', 'uts' (n=15), samples=100, p < .001>" assert_dataobj_equal(res_.t, res.t) # alternate argspec 2 ds1 = Dataset() ds1['a1b1'] = ds.eval("uts[A%B == ('a1', 'b1')]") ds1['a0b0'] = ds.eval("uts[A%B == ('a0', 'b0')]") res1 = testnd.ttest_rel('a1b1', 'a0b0', ds=ds1, samples=100) assert_dataobj_equal(res1.t, res.t) assert repr( res1) == "<ttest_rel 'a1b1', 'a0b0' (n=15), samples=100, p < .001>" # persistence string = pickle.dumps(res, pickle.HIGHEST_PROTOCOL) res_ = pickle.loads(string) assert repr(res_) == repr(res) assert_dataobj_equal(res.p_uncorrected, res_.p_uncorrected) # collapsing cells res2 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=ds, samples=0) assert res2.p_uncorrected.min() < 0.05 assert res2.n == res.n # reproducibility res3 = testnd.ttest_rel('uts', 'A%B', ('a1', 'b1'), ('a0', 'b0'), 'rm', ds=ds, samples=100) assert_dataset_equal(res3.find_clusters(maps=True), res.clusters) configure(n_workers=0) res4 = testnd.ttest_rel('uts', 'A%B', ('a1', 'b1'), ('a0', 'b0'), 'rm', ds=ds, samples=100) assert_dataset_equal(res4.find_clusters(maps=True), res.clusters) configure(n_workers=True) sds = ds.sub("B=='b0'") # thresholded, UTS configure(n_workers=0) res0 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=sds, pmin=0.1, samples=100) tgt = res0.find_clusters() configure(n_workers=True) res1 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=sds, pmin=0.1, samples=100) assert_dataset_equal(res1.find_clusters(), tgt) # thresholded, UTSND configure(n_workers=0) res0 = testnd.ttest_rel('utsnd', 'A', 'a1', 'a0', 'rm', ds=sds, pmin=0.1, samples=100) tgt = res0.find_clusters() configure(n_workers=True) res1 = testnd.ttest_rel('utsnd', 'A', 'a1', 'a0', 'rm', ds=sds, pmin=0.1, samples=100) assert_dataset_equal(res1.find_clusters(), tgt) # TFCE, UTS configure(n_workers=0) res0 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=sds, tfce=True, samples=10) tgt = res0.compute_probability_map() configure(n_workers=True) res1 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=sds, tfce=True, samples=10) assert_dataobj_equal(res1.compute_probability_map(), tgt) # zero variance ds['utsnd'].x[:, 1, 10] = 0. res = testnd.ttest_rel('utsnd', 'A', match='rm', ds=ds) assert res.t.x[1, 10] == 0 # argument length with pytest.raises(ValueError): testnd.ttest_rel('utsnd', 'A[:-1]', match='rm', ds=ds) with pytest.raises(ValueError): testnd.ttest_rel('utsnd', 'A', match='rm[:-1]', ds=ds)
def test_ttest_rel(): "Test testnd.ttest_rel()" ds = datasets.get_uts(True) # basic res = testnd.ttest_rel('uts', 'A%B', ('a1', 'b1'), ('a0', 'b0'), 'rm', ds=ds, samples=100) eq_( repr(res), "<ttest_rel 'uts', 'A x B', ('a1', 'b1'), ('a0', 'b0'), " "'rm' (n=15), samples=100, p=.000>") # alternate argspec ds1 = Dataset() ds1['a1b1'] = ds.eval("uts[A%B == ('a1', 'b1')]") ds1['a0b0'] = ds.eval("uts[A%B == ('a0', 'b0')]") res1 = testnd.ttest_rel('a1b1', 'a0b0', ds=ds1, samples=100) assert_dataobj_equal(res1.t, res.t) eq_(repr(res1), "<ttest_rel 'a1b1', 'a0b0' (n=15), samples=100, p=.000>") # persistence string = pickle.dumps(res, pickle.HIGHEST_PROTOCOL) res_ = pickle.loads(string) repr(res_) assert_equal(repr(res_), repr(res)) assert_dataobj_equal(res.p_uncorrected, res_.p_uncorrected) # collapsing cells res2 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=ds) assert_less(res2.p_uncorrected.min(), 0.05) assert_equal(res2.n, res.n) # reproducibility res3 = testnd.ttest_rel('uts', 'A%B', ('a1', 'b1'), ('a0', 'b0'), 'rm', ds=ds, samples=100) assert_dataset_equal(res3.find_clusters(maps=True), res.clusters) configure(n_workers=0) res4 = testnd.ttest_rel('uts', 'A%B', ('a1', 'b1'), ('a0', 'b0'), 'rm', ds=ds, samples=100) assert_dataset_equal(res4.find_clusters(maps=True), res.clusters) configure(n_workers=True) sds = ds.sub("B=='b0'") # thresholded, UTS configure(n_workers=0) res0 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=sds, pmin=0.1, samples=100) tgt = res0.find_clusters() configure(n_workers=True) res1 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=sds, pmin=0.1, samples=100) assert_dataset_equal(res1.find_clusters(), tgt) # thresholded, UTSND configure(n_workers=0) res0 = testnd.ttest_rel('utsnd', 'A', 'a1', 'a0', 'rm', ds=sds, pmin=0.1, samples=100) tgt = res0.find_clusters() configure(n_workers=True) res1 = testnd.ttest_rel('utsnd', 'A', 'a1', 'a0', 'rm', ds=sds, pmin=0.1, samples=100) assert_dataset_equal(res1.find_clusters(), tgt) # TFCE, UTS configure(n_workers=0) res0 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=sds, tfce=True, samples=10) tgt = res0.compute_probability_map() configure(n_workers=True) res1 = testnd.ttest_rel('uts', 'A', 'a1', 'a0', 'rm', ds=sds, tfce=True, samples=10) assert_dataobj_equal(res1.compute_probability_map(), tgt) # zero variance ds['utsnd'].x[:, 1, 10] = 0. res = testnd.ttest_rel('utsnd', 'A', match='rm', ds=ds) eq_(res.t.x[1, 10], 0)