def _call(self, dataset): """Extract weights from GPR """ clf = self.clf kernel = clf.kernel train_fv = clf._train_fv if isinstance(kernel, LinearKernel): Sigma_p = 1.0 else: Sigma_p = kernel.params.Sigma_p weights = Ndot(Sigma_p, Ndot(train_fv.T, clf._alpha)) if self.ca.is_enabled('variances'): # super ugly formulas that can be quite surely improved: tmp = np.linalg.inv(clf._L) Kyinv = Ndot(tmp.T, tmp) # XXX in such lengthy matrix manipulations you might better off # using np.matrix where * is a matrix product self.ca.variances = Ndiag( Sigma_p - Ndot(Sigma_p, Ndot(train_fv.T, Ndot(Kyinv, Ndot(train_fv, Sigma_p))))) return Dataset(np.atleast_2d(weights))
def _call(self, dataset): sensitivities = [] for ind, analyzer in enumerate(self.__analyzers): if __debug__: debug("SA", "Computing sensitivity for SA#%d:%s" % (ind, analyzer)) sensitivity = analyzer(dataset) sensitivities.append(sensitivity) if __debug__: debug("SA", "Returning %d sensitivities from %s" % (len(sensitivities), self.__class__.__name__)) sa_attr = self._sa_attr if isinstance(sensitivities[0], AttrDataset): smerged = None for i, s in enumerate(sensitivities): s.sa[sa_attr] = np.repeat(i, len(s)) if smerged is None: smerged = s else: smerged.append(s) sensitivities = smerged else: sensitivities = \ Dataset(sensitivities, sa={sa_attr: np.arange(len(sensitivities))}) self.ca.sensitivities = sensitivities return sensitivities
def _forward_dataset_helper(self, ds): # local binding num = self.__num pos = None if not self.__position_attr is None: # we know something about sample position pos = ds.sa[self.__position_attr].value rsamples, pos = resample(ds.samples, self.__num, t=pos, window=self.__window_args) else: # we know nothing about samples position rsamples = resample(ds.samples, self.__num, t=None, window=self.__window_args) # new dataset that reuses that feature and dataset attributes of the # source mds = Dataset(rsamples, fa=ds.fa, a=ds.a) # the tricky part is what to do with the samples attributes, since their # number has changes if self.__attr_strategy == 'remove': # nothing to be done pass elif self.__attr_strategy == 'sample': step = int(len(ds) / num) sa = dict([(k, ds.sa[k].value[0::step][:num]) for k in ds.sa]) mds.sa.update(sa) elif self.__attr_strategy == 'resample': # resample the attributes themselves sa = {} for k in ds.sa: v = ds.sa[k].value if pos is None: sa[k] = resample(v, self.__num, t=None, window=self.__window_args) else: if k == self.__position_attr: # position attr will be handled separately at the end continue sa[k] = resample(v, self.__num, t=pos, window=self.__window_args)[0] # inject them all mds.sa.update(sa) else: raise ValueError("Unkown attribute handling strategy '%s'." % self.__attr_strategy) if not pos is None: # we got the new sample positions and can store them mds.sa[self.__position_attr] = pos return mds
def test_linear_kernel(self): """Simplistic testing of linear kernel""" d1 = Dataset(np.asarray([range(5)] * 10, dtype=float)) lk = npK.LinearKernel() lk.compute(d1) self.failUnless(lk._k.shape == (10, 10), "Failure computing LinearKernel (Size mismatch)") self.failUnless((lk._k == 30).all(), "Failure computing LinearKernel")
def test_cached_kernel(self): nchunks = 5 n = 50 * nchunks d = Dataset(np.random.randn(n, 132)) d.sa.chunks = np.random.randint(nchunks, size=n) # We'll compare against an Rbf just because it has a parameter to change rk = npK.RbfKernel(sigma=1.5) # Assure two kernels are independent for this test ck = CachedKernel(kernel=npK.RbfKernel(sigma=1.5)) ck.compute(d) # Initial cache of all data self.failUnless(ck._recomputed, 'CachedKernel was not initially computed') # Try some splitting for chunk in [d[d.sa.chunks == i] for i in range(nchunks)]: rk.compute(chunk) ck.compute(chunk) self.kernel_equiv(rk, ck) #, accuracy=1e-12) self.failIf(ck._recomputed, "CachedKernel incorrectly recomputed it's kernel") # Test what happens when a parameter changes ck.params.sigma = 3.5 ck.compute(d) self.failUnless(ck._recomputed, "CachedKernel doesn't recompute on kernel change") rk.params.sigma = 3.5 rk.compute(d) self.failUnless(np.all(rk._k == ck._k), 'Cached and rbf kernels disagree after kernel change') # Now test handling new data d2 = Dataset(np.random.randn(32, 43)) ck.compute(d2) self.failUnless( ck._recomputed, "CachedKernel did not automatically recompute new data") ck.compute(d) self.failUnless(ck._recomputed, "CachedKernel did not recompute old data which had\n" +\ "previously been computed, but had the cache overriden")
def _call(self, dataset): """Computes featurewise I-RELIEF weights.""" samples = dataset.samples NS, NF = samples.shape[:2] if self.w_guess == None: self.w = np.ones(NF, 'd') # do normalization in all cases to be safe :) self.w = self.w / (self.w**2).sum() M, H = self.compute_M_H(dataset.targets) while True: self.k = self.kernel(length_scale=self.kernel_width / self.w) d_w_k = self.k.computed(samples).as_raw_np() # set d_w_k to zero where distance=0 (i.e. kernel == # 1.0), otherwise I-RELIEF could not converge. # XXX Note that kernel==1 for distance=0 only for # exponential kernels!! IMPROVE d_w_k[np.abs(d_w_k - 1.0) < 1.0e-15] = 0.0 ni = np.zeros(NF, 'd') for n in range(NS): # d_w_k[n,n] could be omitted since == 0.0 gamma_n = 1.0 - np.nan_to_num(d_w_k[n, M[n]].sum() \ / (d_w_k[n, :].sum()-d_w_k[n, n])) alpha_n = np.nan_to_num(d_w_k[n, M[n]] / (d_w_k[n, M[n]].sum())) beta_n = np.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum())) m_n = (np.abs(samples[n, :] - samples[M[n], :]) \ * alpha_n[:, None]).sum(0) h_n = (np.abs(samples[n, :] - samples[H[n], :]) \ * beta_n[:, None]).sum(0) ni += gamma_n * (m_n - h_n) ni = ni / NS ni_plus = np.clip(ni, 0.0, np.inf) # set all negative elements to zero w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum()))) change = np.abs(w_new - self.w).sum() if __debug__ and 'IRELIEF' in debug.active: debug( 'IRELIEF', "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" % (change, w_new.max(), w_new.min(), w_new.mean(), w_new.std(), np.isnan(w_new).sum())) # update weights: self.w = w_new if change < self.threshold: break return Dataset(self.w[np.newaxis])
def test_datasetmapping(): # 6 samples, 4 features data = np.arange(24).reshape(6, 4) ds = Dataset(data, sa={ 'timepoints': np.arange(6), 'multidim': data.copy() }, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, inspace='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength, ds.nfeatures)) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s + 1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, np.repeat(np.arange(boxlength), 4).reshape(2, -1)) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal( rds.samples, np.array([[0, 1, 2, 3], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11], [16, 17, 18, 19], [20, 21, 22, 23]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid)
def build_streamline_things(self): # Build a dataset having samples of different lengths. This is # trying to mimic a possible interface for streamlines # datasets, i.e., an iterable container of Mx3 points, where M # depends on each single streamline. # trying to pack it into an 'object' array to prevent conversion in the # Dataset self.streamline_samples = np.array([ np.random.rand(3,3), np.random.rand(5,3), np.random.rand(7,3)], dtype='object') self.dataset = Dataset(self.streamline_samples) self.similarities = [StreamlineSimilarity(distance=corouge)]
def _call(self, dataset): """Computes featurewise I-RELIEF weights.""" samples = dataset.samples NS, NF = samples.shape[:2] if self.w_guess == None: w = np.ones(NF, 'd') w /= (w**2).sum() # do normalization in all cases to be safe :) M, H = self.compute_M_H(dataset.targets) while True: d_w_k = self.k(pnorm_w(data1=samples, weight=w, p=1)) ni = np.zeros(NF, 'd') for n in range(NS): # d_w_k[n, n] could be omitted since == 0.0 gamma_n = 1.0 - np.nan_to_num(d_w_k[n, M[n]].sum() \ / (d_w_k[n, :].sum() - d_w_k[n, n])) alpha_n = np.nan_to_num(d_w_k[n, M[n]] / (d_w_k[n, M[n]].sum())) beta_n = np.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum())) m_n = (np.abs(samples[n, :] - samples[M[n], :]) \ * alpha_n[:, None]).sum(0) h_n = (np.abs(samples[n, :] - samples[H[n], :]) \ * beta_n[:, None]).sum(0) ni += gamma_n * (m_n - h_n) ni = ni / NS ni_plus = np.clip(ni, 0.0, np.inf) # set all negative elements to zero w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum()))) change = np.abs(w_new - w).sum() if __debug__ and 'IRELIEF' in debug.active: debug('IRELIEF', "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" \ % (change, w_new.max(), w_new.min(), w_new.mean(), w_new.std(), np.isnan(w_new).sum())) # update weights: w = w_new if change < self.threshold: break self.w = w return Dataset(self.w[np.newaxis])
def test_resample(): time = np.linspace(0, 2 * np.pi, 100) ds = Dataset(np.vstack((np.sin(time), np.cos(time))).T, sa={ 'time': time, 'section': np.repeat(range(10), 10) }) assert_equal(ds.shape, (100, 2)) # downsample num = 10 rm = FFTResampleMapper(num, window=('gauss', 50), position_attr='time', attr_strategy='sample') mds = rm(ds) assert_equal(mds.shape, (num, ds.nfeatures)) # didn't change the orig assert_equal(len(ds), 100) # check position-based resampling ds_partial = ds[0::10] mds_partial = rm(ds_partial) # despite different input sampling should yield the same output timepoints assert_array_almost_equal(mds.sa.time, mds_partial.sa.time) # exclude the first points to prevent edge effects, but the data should be # very similar too assert_array_almost_equal(mds.samples[2:], mds_partial.samples[2:], decimal=2) # simple sample of sa's should give meaningful stuff assert_array_equal(mds.sa.section, range(10)) # and now for a dataset with chunks cds = vstack([ds.copy(), ds.copy()]) cds.sa['chunks'] = np.repeat([0, 1], len(ds)) rm = FFTResampleMapper(num, attr_strategy='sample', chunks_attr='chunks', window=('gauss', 50)) mcds = rm(cds) assert_equal(mcds.shape, (20, 2)) assert_array_equal(mcds.sa.section, np.tile(range(10), 2)) # each individual chunks should be identical to previous dataset assert_array_almost_equal(mds.samples, mcds.samples[:10]) assert_array_almost_equal(mds.samples, mcds.samples[10:])
def bench_pymvpa(X, Y): """ bench with pymvpa (by default uses a custom swig-generated wrapper around libsvm) """ from mvpa.datasets import Dataset from mvpa.clfs import svm gc.collect() # start time tstart = datetime.now() data = Dataset(samples=X, labels=Y) clf = svm.RbfCSVMC(C=1.) clf.train(data) Z = clf.predict(X) delta = (datetime.now() - tstart) # stop time mvpa_results.append(delta.seconds + delta.microseconds / mu_second)
def test_1d_multispace_searchlight(self): ds = Dataset([np.arange(6)]) ds.fa['coord1'] = np.repeat(np.arange(3), 2) # add a second space to the dataset ds.fa['coord2'] = np.tile(np.arange(2), 3) measure = lambda x: "+".join([str(x) for x in x.samples[0]]) # simply select each feature once res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(1)), nproc=1)(ds) assert_array_equal(res.samples, [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(1), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']])
def _call(self, dataset): """Computes featurewise I-RELIEF-2 weights. Online version.""" # local bindings samples = dataset.samples NS, NF = samples.shape[:2] threshold = self.threshold a = self.a if self.w_guess == None: w = np.ones(NF, 'd') # do normalization in all cases to be safe :) w /= (w**2).sum() M, H = self.compute_M_H(dataset.targets) ni = np.zeros(NF, 'd') pi = np.zeros(NF, 'd') if self.permute: # indices to go through x in random order random_sequence = np.random.permutation(NS) else: random_sequence = np.arange(NS) change = threshold + 1.0 iteration = 0 counter = 0.0 while change > threshold and iteration < self.max_iter: if __debug__: debug('IRELIEF', "Iteration %d" % iteration) for t in range(NS): counter += 1.0 n = random_sequence[t] d_xn_x = np.abs(samples[n, :] - samples) d_w_k_xn_x = self.k((d_xn_x * w).sum(1)) d_w_k_xn_Mn = d_w_k_xn_x[M[n]] d_w_k_xn_Mn_sum = d_w_k_xn_Mn.sum() gamma_n = 1.0 - d_w_k_xn_Mn_sum / d_w_k_xn_x.sum() alpha_n = d_w_k_xn_Mn / d_w_k_xn_Mn_sum d_w_k_xn_Hn = d_w_k_xn_x[H[n]] beta_n = d_w_k_xn_Hn / d_w_k_xn_Hn.sum() m_n = (d_xn_x[M[n], :] * alpha_n[:, None]).sum(0) h_n = (d_xn_x[H[n], :] * beta_n[:, None]).sum(0) pi = gamma_n * (m_n - h_n) learning_rate = 1.0 / (counter * a + 1.0) ni_new = ni + learning_rate * (pi - ni) ni = ni_new # set all negative elements to zero ni_plus = np.clip(ni, 0.0, np.inf) w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum()))) change = np.abs(w_new - w).sum() if t % 10 == 0 and __debug__ and 'IRELIEF' in debug.active: debug( 'IRELIEF', "t=%d change=%.4f max=%f min=%.4f mean=%.4f std=%.4f" " #nan=%d" % (t, change, w_new.max(), w_new.min(), w_new.mean(), w_new.std(), np.isnan(w_new).sum())) w = w_new if change < threshold and iteration > 0: break iteration += 1 self.w = w return Dataset(self.w[np.newaxis])
def _call(self, dataset): """Computes featurewise I-RELIEF-2 weights. Online version.""" NS = dataset.samples.shape[0] NF = dataset.samples.shape[1] if self.w_guess == None: self.w = np.ones(NF, 'd') # do normalization in all cases to be safe :) self.w = self.w / (self.w**2).sum() M, H = self.compute_M_H(dataset.targets) ni = np.zeros(NF, 'd') pi = np.zeros(NF, 'd') if self.permute: # indices to go through samples in random order random_sequence = np.random.permutation(NS) else: random_sequence = np.arange(NS) change = self.threshold + 1.0 iteration = 0 counter = 0.0 while change > self.threshold and iteration < self.max_iter: if __debug__: debug('IRELIEF', "Iteration %d" % iteration) for t in range(NS): counter += 1.0 n = random_sequence[t] self.k = self.kernel(length_scale=self.kernel_width / self.w) d_w_k_xn_Mn = self.k.computed( dataset.samples[None, n, :], dataset.samples[M[n], :]).as_raw_np().squeeze() d_w_k_xn_Mn_sum = d_w_k_xn_Mn.sum() d_w_k_xn_x = self.k.computed( dataset.samples[None, n, :], dataset.samples).as_raw_np().squeeze() gamma_n = 1.0 - d_w_k_xn_Mn_sum / d_w_k_xn_x.sum() alpha_n = d_w_k_xn_Mn / d_w_k_xn_Mn_sum d_w_k_xn_Hn = self.k.computed( dataset.samples[None, n, :], dataset.samples[H[n], :]).as_raw_np().squeeze() beta_n = d_w_k_xn_Hn / d_w_k_xn_Hn.sum() m_n = (np.abs(dataset.samples[n, :] - dataset.samples[M[n], :]) \ * alpha_n[:, np.newaxis]).sum(0) h_n = (np.abs(dataset.samples[n, :] - dataset.samples[H[n], :]) \ * beta_n[:, np.newaxis]).sum(0) pi = gamma_n * (m_n - h_n) learning_rate = 1.0 / (counter * self.a + 1.0) ni_new = ni + learning_rate * (pi - ni) ni = ni_new # set all negative elements to zero ni_plus = np.clip(ni, 0.0, np.inf) w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum()))) change = np.abs(w_new - self.w).sum() if t % 10 == 0 and __debug__ and 'IRELIEF' in debug.active: debug( 'IRELIEF', "t=%d change=%.4f max=%f min=%.4f mean=%.4f std=%.4f" " #nan=%d" % (t, change, w_new.max(), w_new.min(), w_new.mean(), w_new.std(), np.isnan(w_new).sum())) self.w = w_new if change < self.threshold and iteration > 0: break iteration += 1 return Dataset(self.w[np.newaxis])
from mvpa.datasets import Dataset #pymvpa stuff f_handle = open("classdatafile.txt", 'r') f_handle2 = open("classidfile.txt", 'r') f_handle3 = open("predictdata.txt", 'r') features = genfromtxt(f_handle, dtype=float) classes = genfromtxt(f_handle2, dtype=int) predictdata = genfromtxt(f_handle3, dtype=float) predictdata = np.expand_dims(predictdata, axis=0) print predictdata print np.shape(features), features.ndim, features.dtype print np.shape(classes), classes.ndim, classes.dtype print np.shape(predictdata), predictdata.ndim, predictdata.dtype f_handle.close() f_handle2.close() f_handle3.close() training = Dataset(samples=features, labels=classes) clf = kNN(k=2) print "clf = ", clf clf.train(training) #print np.mean(clf.predict(training.samples) == training.labels) classID = clf.predict(predictdata) print "classID = ", classID #print clf.trained_labels if classID[0] == 1: print "Image is of class: GRASS" if classID[0] == 2: print "Image is of class: DIRT/GRAVEL" if classID[0] == 3: print "Image is of class: CEMENT/ASPHALT"
lbp = np.asarray(lbp) i3_histo = np.asarray(i3_histo) rgb_histo = np.asarray(rgb_histo) id_index = 15 lbp_predictdata = lbp[[id_index]] i3_histo_predictdata = lbp[[id_index]] print #print predictdata print classID[id_index] #print "len lbp:", len(lbp) #print "shape:", lbp.shape #mvpa lbp_training = Dataset(samples=lbp,labels=classID) i3_histo_training = Dataset(samples=lbp,labels=classID) clf = kNN(k=1, voting='majority') print "clf = ", clf clf.train(lbp_training) lbp_predicted_classID = clf.predict(lbp_predictdata) clf.train(i3_histo_training) i3_histo_predicted_classID = clf.predict(i3_histo_predictdata) print "lbp_predicted_classID: ", lbp_predicted_classID print "i3_histo__predicted_classID :", i3_histo_predicted_classID #if predicted_classID[0] == 1.0: print "Image is of class: GRASS" #if predicted_classID[0] == 2.0: print "Image is of class: DIRT/GRAVEL" #if predicted_classID[0] == 3.0: print "Image is of class: CEMENT/ASPHALT"
def test_polydetrend(): samples_forwhole = np.array( [[1.0, 2, 3, 4, 5, 6], [-2.0, -4, -6, -8, -10, -12]], ndmin=2).T samples_forchunks = np.array( [[1.0, 2, 3, 3, 2, 1], [-2.0, -4, -6, -6, -4, -2]], ndmin=2).T chunks = [0, 0, 0, 1, 1, 1] chunks_bad = [0, 0, 1, 1, 1, 0] target_whole = np.array([[-3.0, -2, -1, 1, 2, 3], [-6, -4, -2, 2, 4, 6]], ndmin=2).T target_chunked = np.array([[-1.0, 0, 1, 1, 0, -1], [2, 0, -2, -2, 0, 2]], ndmin=2).T ds = Dataset(samples_forwhole) # this one will auto-train the mapper on first use dm = PolyDetrendMapper(polyord=1, inspace='police') mds = dm(ds) # features are linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials assert_array_equal(mds.sa.police, np.arange(len(ds))) # hackish way to get the previous regressors into a dataset ds.sa['opt_reg_const'] = dm._regs[:, 0] ds.sa['opt_reg_lin'] = dm._regs[:, 1] # using these precomputed regressors, we should get the same result as # before even if we do not generate a regressor for linear dm_optreg = PolyDetrendMapper(polyord=0, opt_regs=['opt_reg_const', 'opt_reg_lin']) mds_optreg = dm_optreg(ds) assert_array_almost_equal(mds_optreg, np.zeros(mds.shape)) ds = Dataset(samples_forchunks) # 'constant' detrending removes the mean mds = PolyDetrendMapper(polyord=0)(ds) assert_array_almost_equal( mds.samples, samples_forchunks - np.mean(samples_forchunks, axis=0)) # if there is no GLOBAL linear trend it should be identical to mean removal # even if trying to remove linear mds2 = PolyDetrendMapper(polyord=1)(ds) assert_array_almost_equal(mds, mds2) # chunk-wise detrending ds = dataset_wizard(samples_forchunks, chunks=chunks) dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='police') mds = dm(ds) # features are chunkswise linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials, which is the identical linspace in both # chunks assert_array_equal(mds.sa.police, range(3) * 2) # non-matching number of samples cannot be mapped assert_raises(ValueError, dm, ds[:-1]) # however, if the dataset knows about the space it is possible ds.sa['police'] = mds.sa.police # XXX this should be #mds2 = dm(ds[1:-1]) #assert_array_equal(mds[1:-1], mds2) # XXX but right now is assert_raises(NotImplementedError, dm, ds[1:-1]) # Detrend must preserve the size of dataset assert_equal(mds.shape, ds.shape) # small additional test for break points # although they are no longer there ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T, targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1)(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # test of different polyord on each chunk target_mixed = np.array([[-1.0, 0, 1, 0, 0, 0], [2.0, 0, -2, 0, 0, 0]], ndmin=2).T ds = dataset_wizard(samples_forchunks.copy(), targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0, 1])(ds) assert_array_almost_equal(mds, target_mixed) # test irregluar spacing of samples, but with corrective time info samples_forwhole = np.array( [[1.0, 4, 6, 8, 2, 9], [-2.0, -8, -12, -16, -4, -18]], ndmin=2).T ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:, 0]}) # linear detrending that makes use of temporal info from dataset dm = PolyDetrendMapper(polyord=1, inspace='time') mds = dm(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # and now the same stuff, but with chunking and ordered by time samples_forchunks = np.array( [[1.0, 3, 3, 2, 2, 1], [-2.0, -6, -6, -4, -4, -2]], ndmin=2).T chunks = [0, 1, 0, 1, 0, 1] time = [4, 4, 12, 8, 8, 12] ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time}) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='time')(ds) # the whole thing must not affect the source data assert_array_equal(ds, samples_forchunks) # but if done inplace that is no longer true poly_detrend(ds, chunks_attr='chunks', polyord=1, inspace='time') assert_array_equal(ds, mds)