def _call(self, ds): # local binding generator = self._generator node = self._node ca = self.ca space = self.get_space() concat_as = self._concat_as if self.ca.is_enabled("stats") and (not node.ca.has_key("stats") or not node.ca.is_enabled("stats")): warning("'stats' conditional attribute was enabled, but " "the assigned node '%s' either doesn't support it, " "or it is disabled" % node) # precharge conditional attributes ca.datasets = [] # run the node an all generated datasets results = [] for i, sds in enumerate(generator.generate(ds)): if ca.is_enabled("datasets"): # store dataset in ca ca.datasets.append(sds) # run the beast result = node(sds) # callback if not self._callback is None: self._callback(data=sds, node=node, result=result) # subclass postprocessing result = self._repetition_postcall(sds, node, result) if space: # XXX maybe try to get something more informative from the # processing node (e.g. in 0.5 it used to be 'chunks'->'chunks' # to indicate what was trained and what was tested. Now it is # more tricky, because `node` could be anything result.set_attr(space, (i,)) # store results.append(result) if ca.is_enabled("stats") and node.ca.has_key("stats") \ and node.ca.is_enabled("stats"): if not ca.is_set('stats'): # create empty stats container of matching type ca.stats = node.ca['stats'].value.__class__() # harvest summary stats ca['stats'].value.__iadd__(node.ca['stats'].value) # charge condition attribute self.ca.repetition_results = results # stack all results into a single Dataset if concat_as == 'samples': results = vstack(results) elif concat_as == 'features': results = hstack(results) else: raise ValueError("Unkown concatenation mode '%s'" % concat_as) # no need to store the raw results, since the Measure class will # automatically store them in a CA return results
def test_resample(): time = np.linspace(0, 2 * np.pi, 100) ds = Dataset(np.vstack((np.sin(time), np.cos(time))).T, sa={ 'time': time, 'section': np.repeat(range(10), 10) }) assert_equal(ds.shape, (100, 2)) # downsample num = 10 rm = FFTResampleMapper(num, window=('gauss', 50), position_attr='time', attr_strategy='sample') mds = rm(ds) assert_equal(mds.shape, (num, ds.nfeatures)) # didn't change the orig assert_equal(len(ds), 100) # check position-based resampling ds_partial = ds[0::10] mds_partial = rm(ds_partial) # despite different input sampling should yield the same output timepoints assert_array_almost_equal(mds.sa.time, mds_partial.sa.time) # exclude the first points to prevent edge effects, but the data should be # very similar too assert_array_almost_equal(mds.samples[2:], mds_partial.samples[2:], decimal=2) # simple sample of sa's should give meaningful stuff assert_array_equal(mds.sa.section, range(10)) # and now for a dataset with chunks cds = vstack([ds.copy(), ds.copy()]) cds.sa['chunks'] = np.repeat([0, 1], len(ds)) rm = FFTResampleMapper(num, attr_strategy='sample', chunks_attr='chunks', window=('gauss', 50)) mcds = rm(cds) assert_equal(mcds.shape, (20, 2)) assert_array_equal(mcds.sa.section, np.tile(range(10), 2)) # each individual chunks should be identical to previous dataset assert_array_almost_equal(mds.samples, mcds.samples[:10]) assert_array_almost_equal(mds.samples, mcds.samples[10:])
def test_resample(): time = np.linspace(0, 2*np.pi, 100) ds = Dataset(np.vstack((np.sin(time), np.cos(time))).T, sa = {'time': time, 'section': np.repeat(range(10), 10)}) assert_equal(ds.shape, (100, 2)) # downsample num = 10 rm = FFTResampleMapper(num, window=('gauss', 50), position_attr='time', attr_strategy='sample') mds = rm(ds) assert_equal(mds.shape, (num, ds.nfeatures)) # didn't change the orig assert_equal(len(ds), 100) # check position-based resampling ds_partial = ds[0::10] mds_partial = rm(ds_partial) # despite different input sampling should yield the same output timepoints assert_array_almost_equal(mds.sa.time, mds_partial.sa.time) # exclude the first points to prevent edge effects, but the data should be # very similar too assert_array_almost_equal(mds.samples[2:], mds_partial.samples[2:], decimal=2) # simple sample of sa's should give meaningful stuff assert_array_equal(mds.sa.section, range(10)) # and now for a dataset with chunks cds = vstack([ds.copy(), ds.copy()]) cds.sa['chunks'] = np.repeat([0,1], len(ds)) rm = FFTResampleMapper(num, attr_strategy='sample', chunks_attr='chunks', window=('gauss', 50)) mcds = rm(cds) assert_equal(mcds.shape, (20, 2)) assert_array_equal(mcds.sa.section, np.tile(range(10),2)) # each individual chunks should be identical to previous dataset assert_array_almost_equal(mds.samples, mcds.samples[:10]) assert_array_almost_equal(mds.samples, mcds.samples[10:])
def _call(self, dataset): # local bindings analyzer = self.__analyzer insplit_index = self.__insplit_index sensitivities = [] self.ca.splits = splits = [] store_splits = self.ca.is_enabled("splits") for ind,split in enumerate(self.__splitter(dataset)): ds = split[insplit_index] if __debug__ and "SA" in debug.active: debug("SA", "Computing sensitivity for split %d on " "dataset %s using %s" % (ind, ds, analyzer)) sensitivity = analyzer(ds) sensitivities.append(sensitivity) if store_splits: splits.append(split) result = vstack(sensitivities) result.sa['splits'] = np.concatenate([[i] * len(s) for i, s in enumerate(sensitivities)]) self.ca.sensitivities = sensitivities return result
def _call(self, dataset): # local bindings analyzer = self.__analyzer insplit_index = self.__insplit_index sensitivities = [] self.ca.splits = splits = [] store_splits = self.ca.is_enabled("splits") for ind, split in enumerate(self.__splitter(dataset)): ds = split[insplit_index] if __debug__ and "SA" in debug.active: debug("SA", "Computing sensitivity for split %d on " "dataset %s using %s" % (ind, ds, analyzer)) sensitivity = analyzer(ds) sensitivities.append(sensitivity) if store_splits: splits.append(split) result = vstack(sensitivities) result.sa['splits'] = np.concatenate([[i] * len(s) for i, s in enumerate(sensitivities)]) self.ca.sensitivities = sensitivities return result