def test_correct_dimensions_order(self, clf): """To check if known/present Classifiers are working properly with samples being first dimension. Started to worry about possible problems while looking at sg where samples are 2nd dimension """ # specially crafted dataset -- if dimensions are flipped over # the same storage, problem becomes unseparable. Like in this case # incorrect order of dimensions lead to equal samples [0, 1, 0] traindatas = [ dataset_wizard(samples=np.array([[0, 0, 1.0], [1, 0, 0]]), targets=[0, 1]), dataset_wizard(samples=np.array([[0, 0.0], [1, 1]]), targets=[0, 1]) ] clf.ca.change_temporarily(enable_ca=['training_stats']) for traindata in traindatas: clf.train(traindata) self.assertEqual( clf.ca.training_stats.percent_correct, 100.0, "Classifier %s must have 100%% correct learning on %s. Has %f" % ( ` clf `, traindata.samples, clf.ca.training_stats.percent_correct)) # and we must be able to predict every original sample thus for i in xrange(traindata.nsamples): sample = traindata.samples[i, :] predicted = clf.predict([sample]) self.assertEqual( [predicted], traindata.targets[i], "We must be able to predict sample %s using " % sample + "classifier %s" % ` clf `) clf.ca.reset_changed_temporarily()
def test_correct_dimensions_order(self, clf): """To check if known/present Classifiers are working properly with samples being first dimension. Started to worry about possible problems while looking at sg where samples are 2nd dimension """ # specially crafted dataset -- if dimensions are flipped over # the same storage, problem becomes unseparable. Like in this case # incorrect order of dimensions lead to equal samples [0, 1, 0] traindatas = [ dataset_wizard(samples=np.array([ [0, 0, 1.0], [1, 0, 0] ]), targets=[0, 1]), dataset_wizard(samples=np.array([ [0, 0.0], [1, 1] ]), targets=[0, 1])] clf.ca.change_temporarily(enable_ca = ['training_stats']) for traindata in traindatas: clf.train(traindata) self.assertEqual(clf.ca.training_stats.percent_correct, 100.0, "Classifier %s must have 100%% correct learning on %s. Has %f" % (`clf`, traindata.samples, clf.ca.training_stats.percent_correct)) # and we must be able to predict every original sample thus for i in xrange(traindata.nsamples): sample = traindata.samples[i,:] predicted = clf.predict([sample]) self.assertEqual([predicted], traindata.targets[i], "We must be able to predict sample %s using " % sample + "classifier %s" % `clf`) clf.ca.reset_changed_temporarily()
def test_feature_selection_classifier(self): from mvpa2.featsel.base import \ SensitivityBasedFeatureSelection from mvpa2.featsel.helpers import \ FixedNElementTailSelector # should give lowest weight to the feature with lowest index sens_ana = SillySensitivityAnalyzer() # should give lowest weight to the feature with highest index sens_ana_rev = SillySensitivityAnalyzer(mult=-1) # corresponding feature selections feat_sel = SensitivityBasedFeatureSelection( sens_ana, FixedNElementTailSelector(1, mode='discard')) feat_sel_rev = SensitivityBasedFeatureSelection( sens_ana_rev, FixedNElementTailSelector(1)) samples = np.array([[0, 0, -1], [1, 0, 1], [-1, -1, 1], [-1, 0, 1], [1, -1, 1]]) testdata3 = dataset_wizard(samples=samples, targets=1) # dummy train data so proper mapper gets created traindata = dataset_wizard(samples=np.array([[0, 0, -1], [1, 0, 1]]), targets=[1, 2]) # targets res110 = [1, 1, 1, -1, -1] res011 = [-1, 1, -1, 1, -1] # first classifier -- 0th feature should be discarded clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel, enable_ca=['feature_ids']) self.clf_sign.ca.change_temporarily(enable_ca=['estimates']) clf011.train(traindata) self.assertEqual(clf011.predict(testdata3.samples), res011) # just silly test if we get values assigned in the 'ProxyClassifier' self.assertTrue(len(clf011.ca.estimates) == len(res110), msg="We need to pass values into ProxyClassifier") self.clf_sign.ca.reset_changed_temporarily() self.assertEqual(clf011.mapper._oshape, (2, )) "Feature selection classifier had to be trained on 2 features" # first classifier -- last feature should be discarded clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev) clf011.train(traindata) self.assertEqual(clf011.predict(testdata3.samples), res110)
def test_feature_selection_classifier(self): from mvpa2.featsel.base import \ SensitivityBasedFeatureSelection from mvpa2.featsel.helpers import \ FixedNElementTailSelector # should give lowest weight to the feature with lowest index sens_ana = SillySensitivityAnalyzer() # should give lowest weight to the feature with highest index sens_ana_rev = SillySensitivityAnalyzer(mult=-1) # corresponding feature selections feat_sel = SensitivityBasedFeatureSelection(sens_ana, FixedNElementTailSelector(1, mode='discard')) feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev, FixedNElementTailSelector(1)) samples = np.array([ [0, 0, -1], [1, 0, 1], [-1, -1, 1], [-1, 0, 1], [1, -1, 1] ]) testdata3 = dataset_wizard(samples=samples, targets=1) # dummy train data so proper mapper gets created traindata = dataset_wizard(samples=np.array([ [0, 0, -1], [1, 0, 1] ]), targets=[1, 2]) # targets res110 = [1, 1, 1, -1, -1] res011 = [-1, 1, -1, 1, -1] # first classifier -- 0th feature should be discarded clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel, enable_ca=['feature_ids']) self.clf_sign.ca.change_temporarily(enable_ca=['estimates']) clf011.train(traindata) self.assertEqual(clf011.predict(testdata3.samples), res011) # just silly test if we get values assigned in the 'ProxyClassifier' self.assertTrue(len(clf011.ca.estimates) == len(res110), msg="We need to pass values into ProxyClassifier") self.clf_sign.ca.reset_changed_temporarily() self.assertEqual(clf011.mapper._oshape, (2,)) "Feature selection classifier had to be trained on 2 features" # first classifier -- last feature should be discarded clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev) clf011.train(traindata) self.assertEqual(clf011.predict(testdata3.samples), res110)
def test_coarsen_chunks(self): """Just basic testing for now""" chunks = [1,1,2,2,3,3,4,4] ds = dataset_wizard(samples=np.arange(len(chunks)).reshape( (len(chunks),1)), targets=[1]*8, chunks=chunks) coarsen_chunks(ds, nchunks=2) chunks1 = coarsen_chunks(chunks, nchunks=2) self.assertTrue((chunks1 == ds.chunks).all()) self.assertTrue((chunks1 == np.asarray([0,0,0,0,1,1,1,1])).all()) ds2 = dataset_wizard(samples=np.arange(len(chunks)).reshape( (len(chunks),1)), targets=[1]*8, chunks=list(range(len(chunks)))) coarsen_chunks(ds2, nchunks=2) self.assertTrue((chunks1 == ds.chunks).all())
def test_coarsen_chunks(self): """Just basic testing for now""" chunks = [1,1,2,2,3,3,4,4] ds = dataset_wizard(samples=np.arange(len(chunks)).reshape( (len(chunks),1)), targets=[1]*8, chunks=chunks) coarsen_chunks(ds, nchunks=2) chunks1 = coarsen_chunks(chunks, nchunks=2) self.assertTrue((chunks1 == ds.chunks).all()) self.assertTrue((chunks1 == np.asarray([0,0,0,0,1,1,1,1])).all()) ds2 = dataset_wizard(samples=np.arange(len(chunks)).reshape( (len(chunks),1)), targets=[1]*8, chunks=range(len(chunks))) coarsen_chunks(ds2, nchunks=2) self.assertTrue((chunks1 == ds.chunks).all())
def test_mergeds2(): """Test composition of new datasets by addition of existing ones """ data = dataset_wizard([range(5)], targets=1, chunks=1) assert_array_equal(data.UT, [1]) # simple sequence has to be a single pattern assert_equal(data.nsamples, 1) # check correct pattern layout (1x5) assert_array_equal(data.samples, [[0, 1, 2, 3, 4]]) # check for single labels and origin assert_array_equal(data.targets, [1]) assert_array_equal(data.chunks, [1]) # now try adding pattern with wrong shape assert_raises(ValueError, vstack, (data, dataset_wizard(np.ones((2, 3)), targets=1, chunks=1))) # now add two real patterns dss = datasets['uni2large'].samples data = vstack((data, dataset_wizard(dss[:2, :5], targets=2, chunks=2))) assert_equal(data.nfeatures, 5) assert_array_equal(data.targets, [1, 2, 2]) assert_array_equal(data.chunks, [1, 2, 2]) # test automatic origins data = vstack((data, (dataset_wizard(dss[3:5, :5], targets=3, chunks=[0, 1])))) assert_array_equal(data.chunks, [1, 2, 2, 0, 1]) # test unique class labels assert_array_equal(data.UT, [1, 2, 3]) # test wrong label length assert_raises(ValueError, dataset_wizard, dss[:4, :5], targets=[1, 2, 3], chunks=2) # test wrong origin length assert_raises(ValueError, dataset_wizard, dss[:4, :5], targets=[1, 2, 3, 4], chunks=[2, 2, 2])
def test_origid_handling(): ds = dataset_wizard(np.atleast_2d(np.arange(35)).T) ds.init_origids('both') ok_(ds.nsamples == 35) assert_equal(len(np.unique(ds.sa.origids)), 35) assert_equal(len(np.unique(ds.fa.origids)), 1) selector = [3, 7, 10, 15] subds = ds[selector] assert_array_equal(subds.sa.origids, ds.sa.origids[selector]) # Now if we request new origids if they are present we could # expect different behavior assert_raises(ValueError, subds.init_origids, 'both', mode='raises') sa_origids = subds.sa.origids.copy() fa_origids = subds.fa.origids.copy() for s in ('both', 'samples', 'features'): assert_raises(RuntimeError, subds.init_origids, s, mode='raise') subds.init_origids(s, mode='existing') # we should have the same origids as before assert_array_equal(subds.sa.origids, sa_origids) assert_array_equal(subds.fa.origids, fa_origids) # Lets now change, which should be default behavior subds.init_origids('both') assert_equal(len(sa_origids), len(subds.sa.origids)) assert_equal(len(fa_origids), len(subds.fa.origids)) # values should change though ok_((sa_origids != subds.sa.origids).any()) ok_((fa_origids != subds.fa.origids).any())
def test_idhash(): ds = dataset_wizard(np.arange(12).reshape((4, 3)), targets=1, chunks=1) origid = ds.idhash #XXX BUG -- no assurance that labels would become an array... for now -- do manually ds.targets = np.array([3, 1, 2, 3]) # change all labels ok_(origid != ds.idhash, msg="Changing all targets should alter dataset's idhash") origid = ds.idhash z = ds.targets[1] assert_equal(origid, ds.idhash, msg="Accessing shouldn't change idhash") z = ds.chunks assert_equal(origid, ds.idhash, msg="Accessing shouldn't change idhash") z[2] = 333 ok_(origid != ds.idhash, msg="Changing value in attribute should change idhash") origid = ds.idhash ds.samples[1, 1] = 1000 ok_(origid != ds.idhash, msg="Changing value in data should change idhash") origid = ds.idhash orig_labels = ds.targets #.copy() ds.sa.targets = range(len(ds)) ok_(origid != ds.idhash, msg="Chaging attribute also changes idhash") ds.targets = orig_labels ok_(origid == ds.idhash, msg="idhash should be restored after reassigning orig targets")
def test_DissimilarityConsistencyMeasure(): targets = np.tile(xrange(3),2) chunks = np.repeat(np.array((0,1)),3) # correct results cres1 = 0.41894348 cres2 = np.array([[ 0.16137995, 0.73062639, 0.59441713]]) dc1 = data[0:3,:] - np.mean(data[0:3,:],0) dc2 = data[3:6,:] - np.mean(data[3:6,:],0) center = squareform(np.corrcoef(pdist(dc1,'correlation'),pdist(dc2,'correlation')), checks=False).reshape((1,-1)) dsm1 = stats.rankdata(pdist(data[0:3,:],'correlation').reshape((1,-1))) dsm2 = stats.rankdata(pdist(data[3:6,:],'correlation').reshape((1,-1))) spearman = squareform(np.corrcoef(np.vstack((dsm1,dsm2))), checks=False).reshape((1,-1)) ds = dataset_wizard(samples=data, targets=targets, chunks=chunks) dscm = DissimilarityConsistencyMeasure() res1 = dscm(ds) dscm_c = DissimilarityConsistencyMeasure(center_data=True) res2 = dscm_c(ds) dscm_sp = DissimilarityConsistencyMeasure(consistency_metric='spearman') res3 = dscm_sp(ds) ds.append(ds) chunks = np.repeat(np.array((0,1,2,)),4) ds.sa['chunks'] = chunks res4 = dscm(ds) assert_almost_equal(np.mean(res1.samples),cres1) assert_array_almost_equal(res2.samples, center) assert_array_almost_equal(res3.samples, spearman) assert_array_almost_equal(res4.samples,cres2)
def dumb_feature_binary_dataset(): """Very simple binary (2 labels) dataset """ data = [ [1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1], [4, 0], [4, 1], [5, 0], [5, 1], [6, 0], [6, 1], [7, 0], [7, 1], [8, 0], [8, 1], [9, 0], [9, 1], [10, 0], [10, 1], [11, 0], [11, 1], [12, 0], [12, 1], ] regs = ([0] * 12) + ([1] * 12) return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
def dumb_feature_dataset(): """Create a very simple dataset with 2 features and 3 labels """ data = [ [1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1], [4, 0], [4, 1], [5, 0], [5, 1], [6, 0], [6, 1], [7, 0], [7, 1], [8, 0], [8, 1], [9, 0], [9, 1], [10, 0], [10, 1], [11, 0], [11, 1], [12, 0], [12, 1], ] regs = ([1] * 8) + ([2] * 8) + ([3] * 8) return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
def linear1d_gaussian_noise(size=100, slope=0.5, intercept=1.0, x_min=-2.0, x_max=3.0, sigma=0.2): """A straight line with some Gaussian noise. """ x = np.linspace(start=x_min, stop=x_max, num=size) noise = np.random.randn(size) * sigma y = x * slope + intercept + noise return dataset_wizard(samples=x[:, None], targets=y)
def pure_multivariate_signal(patterns, signal2noise=1.5, chunks=None, targets=[0, 1]): """ Create a 2d dataset with a clear multivariate signal, but no univariate information. :: %%%%%%%%% % O % X % %%%%%%%%% % X % O % %%%%%%%%% """ # start with noise data = np.random.normal(size=(4 * patterns, 2)) # add signal data[:2 * patterns, 1] += signal2noise data[2 * patterns:4 * patterns, 1] -= signal2noise data[:patterns, 0] -= signal2noise data[2 * patterns:3 * patterns, 0] -= signal2noise data[patterns:2 * patterns, 0] += signal2noise data[3 * patterns:4 * patterns, 0] += signal2noise # two conditions regs = np.array((targets[0:1] * patterns) + (targets[1:2] * 2 * patterns) + (targets[0:1] * patterns)) if chunks is None: chunks = range(len(data)) return dataset_wizard(samples=data, targets=regs, chunks=chunks)
def test_feature_selection_classifier_with_regression(self): from mvpa2.featsel.base import \ SensitivityBasedFeatureSelection from mvpa2.featsel.helpers import \ FixedNElementTailSelector if sample_clf_reg is None: # none regression was found, so nothing to test return # should give lowest weight to the feature with lowest index sens_ana = SillySensitivityAnalyzer() # corresponding feature selections feat_sel = SensitivityBasedFeatureSelection( sens_ana, FixedNElementTailSelector(1, mode='discard')) # now test with regression-based classifier. The problem is # that it is determining predictions twice from values and # then setting the values from the results, which the second # time is set to predictions. The final outcome is that the # values are actually predictions... dat = dataset_wizard(samples=np.random.randn(4, 10), targets=[-1, -1, 1, 1]) clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel) clf_reg.train(dat) _ = clf_reg.predict(dat.samples) self.failIf( (np.array(clf_reg.ca.estimates) - clf_reg.ca.predictions).sum() == 0, msg="Values were set to the predictions in %s." % sample_clf_reg)
def test_mapper_vs_zscore(): """Test by comparing to results of elderly z-score function """ # data: 40 sample feature line in 20d space (40x20; samples x features) dss = [ dataset_wizard(np.concatenate( [np.arange(40) for i in range(20)]).reshape(20,-1).T, targets=1, chunks=1), ] + datasets.values() for ds in dss: ds1 = deepcopy(ds) ds2 = deepcopy(ds) zsm = ZScoreMapper(chunks_attr=None) assert_raises(RuntimeError, zsm.forward, ds1.samples) idhashes = (idhash(ds1), idhash(ds1.samples)) zsm.train(ds1) idhashes_train = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_train) # forward dataset ds1z_ds = zsm.forward(ds1) idhashes_forwardds = (idhash(ds1), idhash(ds1.samples)) # must not modify samples in place! assert_equal(idhashes, idhashes_forwardds) # forward samples explicitly ds1z = zsm.forward(ds1.samples) idhashes_forward = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_forward) zscore(ds2, chunks_attr=None) assert_array_almost_equal(ds1z, ds2.samples) assert_array_equal(ds1.samples, ds.samples)
def test_PDist(): targets = np.tile(xrange(3), 2) chunks = np.repeat(np.array((0, 1)), 3) ds = dataset_wizard(samples=data, targets=targets, chunks=chunks) data_c = data - np.mean(data, 0) # DSM matrix elements should come out as samples of one feature # to be in line with what e.g. a classifier returns -- facilitates # collection in a searchlight ... euc = pdist(data, 'euclidean')[None].T pear = pdist(data, 'correlation')[None].T city = pdist(data, 'cityblock')[None].T center_sq = squareform(pdist(data_c, 'correlation')) # Now center each chunk separately dsm1 = PDist() dsm2 = PDist(pairwise_metric='euclidean') dsm3 = PDist(pairwise_metric='cityblock') dsm4 = PDist(center_data=True, square=True) assert_array_almost_equal(dsm1(ds).samples, pear) assert_array_almost_equal(dsm2(ds).samples, euc) dsm_res = dsm3(ds) assert_array_almost_equal(dsm_res.samples, city) # length correspondings to a single triangular matrix assert_equal(len(dsm_res.sa.pairs), len(ds) * (len(ds) - 1) / 2) # generate label pairs actually reflect the vectorform generated by # squareform() dsm_res_square = squareform(dsm_res.samples.T[0]) for i, p in enumerate(dsm_res.sa.pairs): assert_equal(dsm_res_square[p[0], p[1]], dsm_res.samples[i, 0]) dsm_res = dsm4(ds) assert_array_almost_equal(dsm_res.samples, center_sq) # sample attributes are carried over assert_almost_equal(ds.sa.targets, dsm_res.sa.targets)
def test_PDist(): targets = np.tile(xrange(3),2) chunks = np.repeat(np.array((0,1)),3) ds = dataset_wizard(samples=data, targets=targets, chunks=chunks) data_c = data - np.mean(data,0) # DSM matrix elements should come out as samples of one feature # to be in line with what e.g. a classifier returns -- facilitates # collection in a searchlight ... euc = pdist(data, 'euclidean')[None].T pear = pdist(data, 'correlation')[None].T city = pdist(data, 'cityblock')[None].T center_sq = squareform(pdist(data_c,'correlation')) # Now center each chunk separately dsm1 = PDist() dsm2 = PDist(pairwise_metric='euclidean') dsm3 = PDist(pairwise_metric='cityblock') dsm4 = PDist(center_data=True,square=True) assert_array_almost_equal(dsm1(ds).samples,pear) assert_array_almost_equal(dsm2(ds).samples,euc) dsm_res = dsm3(ds) assert_array_almost_equal(dsm_res.samples,city) # length correspondings to a single triangular matrix assert_equal(len(dsm_res.sa.pairs), len(ds) * (len(ds) - 1) / 2) # generate label pairs actually reflect the vectorform generated by # squareform() dsm_res_square = squareform(dsm_res.samples.T[0]) for i, p in enumerate(dsm_res.sa.pairs): assert_equal(dsm_res_square[p[0], p[1]], dsm_res.samples[i, 0]) dsm_res = dsm4(ds) assert_array_almost_equal(dsm_res.samples,center_sq) # sample attributes are carried over assert_almost_equal(ds.sa.targets, dsm_res.sa.targets)
def pure_multivariate_signal(patterns, signal2noise = 1.5, chunks=None, targets=[0, 1]): """ Create a 2d dataset with a clear multivariate signal, but no univariate information. :: %%%%%%%%% % O % X % %%%%%%%%% % X % O % %%%%%%%%% """ # start with noise data = np.random.normal(size=(4*patterns, 2)) # add signal data[:2*patterns, 1] += signal2noise data[2*patterns:4*patterns, 1] -= signal2noise data[:patterns, 0] -= signal2noise data[2*patterns:3*patterns, 0] -= signal2noise data[patterns:2*patterns, 0] += signal2noise data[3*patterns:4*patterns, 0] += signal2noise # two conditions regs = np.array((targets[0:1] * patterns) + (targets[1:2] * 2 * patterns) + (targets[0:1] * patterns)) if chunks is None: chunks = range(len(data)) return dataset_wizard(samples=data, targets=regs, chunks=chunks)
def test_feature_selection_classifier_with_regression(self): from mvpa2.featsel.base import \ SensitivityBasedFeatureSelection from mvpa2.featsel.helpers import \ FixedNElementTailSelector if sample_clf_reg is None: # none regression was found, so nothing to test return # should give lowest weight to the feature with lowest index sens_ana = SillySensitivityAnalyzer() # corresponding feature selections feat_sel = SensitivityBasedFeatureSelection(sens_ana, FixedNElementTailSelector(1, mode='discard')) # now test with regression-based classifier. The problem is # that it is determining predictions twice from values and # then setting the values from the results, which the second # time is set to predictions. The final outcome is that the # values are actually predictions... dat = dataset_wizard(samples=np.random.randn(4, 10), targets=[-1, -1, 1, 1]) clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel) clf_reg.train(dat) _ = clf_reg.predict(dat.samples) self.failIf((np.array(clf_reg.ca.estimates) - clf_reg.ca.predictions).sum()==0, msg="Values were set to the predictions in %s." % sample_clf_reg)
def load_fcmri_dataset(data, subjects, conditions, group, level, n_run=3): attributes = [] samples = [] for ic, c in enumerate(conditions): for isb, s in enumerate(subjects): for i in range(n_run): matrix = data[ic, isb, i, :] fmatrix = flatten_matrix(matrix) samples.append(fmatrix) attributes.append([c, s, i, group[isb], level[isb]]) attributes = np.array(attributes) ds = dataset_wizard(np.array(samples), targets=attributes.T[0], chunks=attributes.T[1]) ds.sa['run'] = attributes.T[2] ds.sa['group'] = attributes.T[3] ds.sa['level'] = np.int_(attributes.T[4]) ds.sa['meditation'] = attributes.T[0] return ds
def _test_mcasey20120222(): # pragma: no cover # http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2012q1/002034.html # This one is conditioned on allowing # of samples to be changed # by the mapper provided to MappedClassifier. See # https://github.com/yarikoptic/PyMVPA/tree/_tent/allow_ch_nsamples import numpy as np from mvpa2.datasets.base import dataset_wizard from mvpa2.generators.partition import NFoldPartitioner from mvpa2.mappers.base import ChainMapper from mvpa2.mappers.svd import SVDMapper from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import MappedClassifier from mvpa2.measures.base import CrossValidation mapper = ChainMapper([mean_group_sample(['targets','chunks']), SVDMapper()]) clf = MappedClassifier(LinearCSVMC(), mapper) cvte = CrossValidation(clf, NFoldPartitioner(), enable_ca=['repetition_results', 'stats']) ds = dataset_wizard( samples=np.arange(32).reshape((8, -1)), targets=[1, 1, 2, 2, 1, 1, 2, 2], chunks=[1, 1, 1, 1, 2, 2, 2, 2]) errors = cvte(ds)
def test_aggregation(self): data = dataset_wizard(np.arange( 20 ).reshape((4, 5)), targets=1, chunks=1) ag_data = aggregate_features(data, np.mean) ok_(ag_data.nsamples == 4) ok_(ag_data.nfeatures == 1) assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
def test_str(): args = (np.arange(12, dtype=np.int8).reshape( (4, 3)), range(4), [1, 1, 2, 2]) for iargs in range(1, len(args)): ds = dataset_wizard(*(args[:iargs])) ds_s = str(ds) ok_(ds_s.startswith('<Dataset: 4x3@int8')) ok_(ds_s.endswith('>'))
def linear1d_gaussian_noise(size=100, slope=0.5, intercept=1.0, x_min=-2.0, x_max=3.0, sigma=0.2): """A straight line with some Gaussian noise. """ x = np.linspace(start=x_min, stop=x_max, num=size) noise = np.random.randn(size)*sigma y = x * slope + intercept + noise return dataset_wizard(samples=x[:, None], targets=y)
def setUp(self): self.clf_sign = SameSignClassifier() self.clf_less1 = Less1Classifier() # simple binary dataset self.data_bin_1 = dataset_wizard( samples=[[0, 0], [-10, -1], [1, 0.1], [1, -1], [-1, 1]], targets=[1, 1, 1, -1, -1], # labels chunks=[0, 1, 2, 2, 3]) # chunks
def test_str(): args = ( np.arange(12, dtype=np.int8).reshape((4, 3)), range(4), [1, 1, 2, 2]) for iargs in range(1, len(args)): ds = dataset_wizard(*(args[:iargs])) ds_s = str(ds) ok_(ds_s.startswith('<Dataset: 4x3@int8')) ok_(ds_s.endswith('>'))
def setUp(self): self.clf_sign = SameSignClassifier() self.clf_less1 = Less1Classifier() # simple binary dataset self.data_bin_1 = dataset_wizard( samples=[[0,0],[-10,-1],[1,0.1],[1,-1],[-1,1]], targets=[1, 1, 1, -1, -1], # labels chunks=[0, 1, 2, 2, 3]) # chunks
def test_nfold_random_counted_selection_partitioner_huge(self): # Just test that it completes in a reasonable time and does # not blow up as if would do if it was not limited by count kwargs = dict(count=10) ds = dataset_wizard(np.arange(1000).reshape((-1, 1)), targets=range(1000), chunks=range(500) * 2) split_partitions_random = [ tuple(x.sa.partitions) for x in NFoldPartitioner(100, selection_strategy="random", **kwargs).generate(ds) ] assert_equal(len(split_partitions_random), 10) # we get just 10
def test_nonfinite_features_removal(self): r = np.random.normal(size=(4, 5)) ds = dataset_wizard(r, targets=1, chunks=1) ds.samples[2,0]=np.NaN ds.samples[3,3]=np.Inf dsc = remove_nonfinite_features(ds) self.assertTrue(dsc.nfeatures == 3) assert_array_equal(ds[:, [1, 2, 4]].samples, dsc.samples)
def dumb_feature_binary_dataset(): """Very simple binary (2 labels) dataset """ data = [[1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1], [4, 0], [4, 1], [5, 0], [5, 1], [6, 0], [6, 1], [7, 0], [7, 1], [8, 0], [8, 1], [9, 0], [9, 1], [10, 0], [10, 1], [11, 0], [11, 1], [12, 0], [12, 1]] regs = ([0] * 12) + ([1] * 12) return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
def test_invar_features_removal(self): r = np.random.normal(size=(3, 1)) ds = dataset_wizard(samples=np.hstack((np.zeros((3, 2)), r)), targets=1) self.failUnless(ds.nfeatures == 3) dsc = remove_invariant_features(ds) self.failUnless(dsc.nfeatures == 1) self.failUnless((dsc.samples == r).all())
def dumb_feature_dataset(): """Create a very simple dataset with 2 features and 3 labels """ data = [[1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1], [4, 0], [4, 1], [5, 0], [5, 1], [6, 0], [6, 1], [7, 0], [7, 1], [8, 0], [8, 1], [9, 0], [9, 1], [10, 0], [10, 1], [11, 0], [11, 1], [12, 0], [12, 1]] regs = ([1] * 8) + ([2] * 8) + ([3] * 8) return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
def get_dataset(self): zresults = self.results new_shape = list(zresults.shape[-2:]) new_shape.insert(0, -1) zreshaped = zresults.reshape(new_shape) upper_mask = np.ones_like(zreshaped[0]) upper_mask[np.tril_indices(zreshaped[0].shape[0])] = 0 upper_mask = np.bool_(upper_mask) # Reshape data to have samples x features ds_data = zreshaped[:, upper_mask] labels = [] n_runs = zresults.shape[2] n_subj = zresults.shape[1] for l in self.conditions.keys(): labels += [l for _ in range(n_runs * n_subj)] ds_labels = np.array(labels) ds_subjects = [] for s in self.subjects: ds_subjects += [s for _ in range(n_runs)] ds_subjects = np.array(ds_subjects) ds_info = [] for _ in self.conditions.keys(): ds_info.append(ds_subjects) ds_info = np.vstack(ds_info) logger.debug(ds_info) logger.debug(ds_info.shape) logger.debug(ds_data.shape) self.ds = dataset_wizard(ds_data, targets=ds_labels, chunks=np.int_(ds_info.T[5])) self.ds.sa['subjects'] = ds_info.T[0] self.ds.sa['groups'] = ds_info.T[1] self.ds.sa['chunks_1'] = ds_info.T[2] self.ds.sa['expertise'] = ds_info.T[3] self.ds.sa['age'] = ds_info.T[4] self.ds.sa['chunks_2'] = ds_info.T[5] self.ds.sa['meditation'] = ds_labels logger.debug(ds_info.T[4]) logger.debug(self.ds.sa.keys()) return self.ds
def test_mergeds2(): """Test composition of new datasets by addition of existing ones """ data = dataset_wizard([range(5)], targets=1, chunks=1) assert_array_equal(data.UT, [1]) # simple sequence has to be a single pattern assert_equal(data.nsamples, 1) # check correct pattern layout (1x5) assert_array_equal(data.samples, [[0, 1, 2, 3, 4]]) # check for single labels and origin assert_array_equal(data.targets, [1]) assert_array_equal(data.chunks, [1]) # now try adding pattern with wrong shape assert_raises(DatasetError, data.append, dataset_wizard(np.ones((2,3)), targets=1, chunks=1)) # now add two real patterns dss = datasets['uni2large'].samples data.append(dataset_wizard(dss[:2, :5], targets=2, chunks=2)) assert_equal(data.nfeatures, 5) assert_array_equal(data.targets, [1, 2, 2]) assert_array_equal(data.chunks, [1, 2, 2]) # test automatic origins data.append(dataset_wizard(dss[3:5, :5], targets=3, chunks=[0, 1])) assert_array_equal(data.chunks, [1, 2, 2, 0, 1]) # test unique class labels assert_array_equal(data.UT, [1, 2, 3]) # test wrong label length assert_raises(ValueError, dataset_wizard, dss[:4, :5], targets=[ 1, 2, 3 ], chunks=2) # test wrong origin length assert_raises(ValueError, dataset_wizard, dss[:4, :5], targets=[ 1, 2, 3, 4 ], chunks=[ 2, 2, 2 ])
def test_invar_features_removal(self): r = np.random.normal(size=(3,1)) ds = dataset_wizard(samples=np.hstack((np.zeros((3,2)), r)), targets=1) self.assertTrue(ds.nfeatures == 3) dsc = remove_invariant_features(ds) self.assertTrue(dsc.nfeatures == 1) self.assertTrue((dsc.samples == r).all())
def test_nfold_random_counted_selection_partitioner_huge(self): # Just test that it completes in a reasonable time and does # not blow up as if would do if it was not limited by count kwargs = dict(count=10) ds = dataset_wizard(np.arange(1000).reshape((-1, 1)), targets=range(1000), chunks=range(500) * 2) split_partitions_random = [ tuple(x.sa.partitions) for x in NFoldPartitioner( 100, selection_strategy='random', **kwargs).generate(ds) ] assert_equal(len(split_partitions_random), 10) # we get just 10
def get_dataset(self): zresults = self.results new_shape = list(zresults.shape[-2:]) new_shape.insert(0, -1) zreshaped = zresults.reshape(new_shape) upper_mask = np.ones_like(zreshaped[0]) upper_mask[np.tril_indices(zreshaped[0].shape[0])] = 0 upper_mask = np.bool_(upper_mask) # Reshape data to have samples x features ds_data = zreshaped[:,upper_mask] labels = [] n_runs = zresults.shape[2] n_subj = zresults.shape[1] for l in self.conditions.keys(): labels += [l for _ in range(n_runs * n_subj)] ds_labels = np.array(labels) ds_subjects = [] for s in self.subjects: ds_subjects += [s for _ in range(n_runs)] ds_subjects = np.array(ds_subjects) ds_info = [] for _ in self.conditions.keys(): ds_info.append(ds_subjects) ds_info = np.vstack(ds_info) logger.debug(ds_info) logger.debug(ds_info.shape) logger.debug(ds_data.shape) self.ds = dataset_wizard(ds_data, targets=ds_labels, chunks=np.int_(ds_info.T[5])) self.ds.sa['subjects'] = ds_info.T[0] self.ds.sa['groups'] = ds_info.T[1] self.ds.sa['chunks_1'] = ds_info.T[2] self.ds.sa['expertise'] = ds_info.T[3] self.ds.sa['age'] = ds_info.T[4] self.ds.sa['chunks_2'] = ds_info.T[5] self.ds.sa['meditation'] = ds_labels logger.debug(ds_info.T[4]) logger.debug(self.ds.sa.keys()) return self.ds
def load_connectivity_ds(path, subjects, extra_sa_file=None): """ Loads txt connectivity matrices in the form of n_roi x n_roi matrix Parameters ---------- path : string Pathname of the data folder subjects : list List of subjects included in the analysis it is always a directory in the path extra_sa : dictionary Dictionary of extra fields to be included. The dictionary must be in the form of 'field': list of extra fields dictionary must include the field 'subject' Returns ------- ds : pympva dataset The loaded dataset """ data, attributes = load_txt_matrices(path, subjects) n_roi = data.shape[1] indices = np.triu_indices(n_roi, k=1) n_samples = data.shape[0] n_features = len(indices[0]) samples = np.zeros((n_samples, n_features)) for i in range(n_samples): samples[i] = data[i][indices] mask = np.isnan(samples).sum(0) ds = dataset_wizard(samples) for key, value in attributes.iteritems(): ds.sa[key] = value ds.fa['nan_mask'] = np.bool_(mask) if extra_sa_file != None: _, fextra_sa = load_subject_file(extra_sa_file) ds = add_subject_attributes(ds, fextra_sa) return ds
def test_reflection(self, rep=10): for i in range(rep): from mvpa2.testing.datasets import get_random_rotation d = np.random.random((100, 2)) T = get_random_rotation(d.shape[1]) d2 = np.dot(d, T) # scale it up a bit d2 *= 1.2 # add a reflection by flipping the first dimension d2[:, 0] *= -1 ds = dataset_wizard(samples=d, targets=d2) norm0 = np.linalg.norm(d - d2) mapper = ProcrusteanMapper(scaling=False, reflection=False) mapper.train(ds) norm1 = np.linalg.norm(d2 - mapper.forward(ds).samples) eps = 1e-7 self.assertLess(norm1, norm0 + eps, msg='Procrustes should reduce difference, ' 'but %f > %f' % (norm1, norm0)) mapper = ProcrusteanMapper(scaling=True, reflection=False) mapper.train(ds) norm2 = np.linalg.norm(d2 - mapper.forward(ds).samples) self.assertLess(norm2, norm1 + eps, msg='Procrustes with scaling should work better, ' 'but %f > %f' % (norm2, norm1)) mapper = ProcrusteanMapper(scaling=False, reflection=True) mapper.train(ds) norm3 = np.linalg.norm(d2 - mapper.forward(ds).samples) self.assertLess( norm3, norm1 + eps, msg='Procrustes with reflection should work better, ' 'but %f > %f' % (norm3, norm1)) mapper = ProcrusteanMapper(scaling=True, reflection=True) mapper.train(ds) norm4 = np.linalg.norm(d2 - mapper.forward(ds).samples) self.assertLess(norm4, norm3 + eps, msg='Procrustes with scaling should work better, ' 'but %f > %f' % (norm4, norm3)) self.assertLess( norm4, norm2 + eps, msg='Procrustes with reflection should work better, ' 'but %f > %f' % (norm4, norm2))
def test_surface_dset_h5py_io_with_unicode(self, fn): skip_if_no_external('h5py') from mvpa2.base.hdf5 import h5save, h5load ds = dataset_wizard(np.arange(20).reshape((4, 5)), targets=1, chunks=1) ds.sa['unicode'] = ['u1', 'uu2', 'uuu3', 'uuuu4'] ds.sa['str'] = ['s1', 'ss2', 'sss3', 'ssss4'] ds.fa['node_indices'] = np.arange(5) # test h5py I/O h5save(fn, ds) ds2 = h5load(fn) assert_datasets_equal(ds, ds2)
def test_surface_dset_h5py_io_with_unicode(self, fn): skip_if_no_external('h5py') from mvpa2.base.hdf5 import h5save, h5load ds = dataset_wizard(np.arange(20).reshape((4, 5)), targets=1, chunks=1) ds.sa['unicode'] = [u'u1', u'uu2', u'uuu3', u'uuuu4'] ds.sa['str'] = ['s1', 'ss2', 'sss3', 'ssss4'] ds.fa['node_indices'] = np.arange(5) # test h5py I/O h5save(fn, ds) ds2 = h5load(fn) assert_datasets_equal(ds, ds2)
def sin_modulated(n_instances, n_features, flat=False, noise=0.4): """ Generate a (quite) complex multidimensional non-linear dataset Used for regression testing. In the data label is a sin of a x^2 + uniform noise """ if flat: data = np.arange(0.0, 1.0, 1.0 / n_instances) * np.pi data.resize(n_instances, n_features) else: data = np.random.rand(n_instances, n_features) * np.pi label = np.sin((data ** 2).sum(1)).round() label += np.random.rand(label.size) * noise return dataset_wizard(samples=data, targets=label)
def sin_modulated(n_instances, n_features, flat=False, noise=0.4): """ Generate a (quite) complex multidimensional non-linear dataset Used for regression testing. In the data label is a sin of a x^2 + uniform noise """ if flat: data = (np.arange(0.0, 1.0, 1.0 / n_instances) * np.pi) data.resize(n_instances, n_features) else: data = np.random.rand(n_instances, n_features) * np.pi label = np.sin((data**2).sum(1)).round() label += np.random.rand(label.size) * noise return dataset_wizard(samples=data, targets=label)
def pure_multivariate_signal(patterns, signal2noise=1.5, chunks=None, targets=None): """ Create a 2d dataset with a clear purely multivariate signal. This is known is the XOR problem. :: %%%%%%%%% % O % X % %%%%%%%%% % X % O % %%%%%%%%% Parameters ---------- patterns: int Number of data points in each of the four dot clouds signal2noise: float, optional Univariate signal pedestal. chunks: array, optional Vector for chunk labels for all generated samples. targets: list, optional Length-2 sequence of target values for both classes. If None, [0, 1] is used. """ if targets is None: targets = [0, 1] # start with noise data = np.random.normal(size=(4 * patterns, 2)) # add signal data[:2 * patterns, 1] += signal2noise data[2 * patterns:4 * patterns, 1] -= signal2noise data[:patterns, 0] -= signal2noise data[2 * patterns:3 * patterns, 0] -= signal2noise data[patterns:2 * patterns, 0] += signal2noise data[3 * patterns:4 * patterns, 0] += signal2noise # two conditions regs = np.array((targets[0:1] * patterns) + (targets[1:2] * 2 * patterns) + (targets[0:1] * patterns)) if chunks is None: chunks = range(len(data)) return dataset_wizard(samples=data, targets=regs, chunks=chunks)
def test_combined_samplesfeature_selection(): data = dataset_wizard(np.arange(20).reshape((4, 5)).view(myarray), targets=[1,2,3,4], chunks=[5,6,7,8]) # array subclass survives ok_(isinstance(data.samples, myarray)) ok_(data.nsamples == 4) ok_(data.nfeatures == 5) sel = data[[0, 3], [1, 2]] ok_(sel.nsamples == 2) ok_(sel.nfeatures == 2) assert_array_equal(sel.targets, [1, 4]) assert_array_equal(sel.chunks, [5, 8]) assert_array_equal(sel.samples, [[1, 2], [16, 17]]) # array subclass survives ok_(isinstance(sel.samples, myarray)) # should yield the same result if done sequentially sel2 = data[:, [1, 2]] sel2 = sel2[[0, 3]] assert_array_equal(sel.samples, sel2.samples) ok_(sel2.nsamples == 2) ok_(sel2.nfeatures == 2) # array subclass survives ok_(isinstance(sel.samples, myarray)) assert_raises(ValueError, data.__getitem__, (1, 2, 3)) # test correct behavior when selecting just single rows/columns single = data[0] ok_(single.nsamples == 1) ok_(single.nfeatures == 5) assert_array_equal(single.samples, [[0, 1, 2, 3, 4]]) single = data[:, 0] ok_(single.nsamples == 4) ok_(single.nfeatures == 1) assert_array_equal(single.samples, [[0], [5], [10], [15]]) single = data[1, 1] ok_(single.nsamples == 1) ok_(single.nfeatures == 1) assert_array_equal(single.samples, [[6]]) # array subclass survives ok_(isinstance(single.samples, myarray))
def chirp_linear(n_instances, n_features=4, n_nonbogus_features=2, data_noise=0.4, noise=0.1): """ Generates simple dataset for linear regressions Generates chirp signal, populates n_nonbogus_features out of n_features with it with different noise level and then provides signal itself with additional noise as labels """ x = np.linspace(0, 1, n_instances) y = np.sin((10 * np.pi * x ** 2)) data = np.random.normal(size=(n_instances, n_features)) * data_noise for i in xrange(n_nonbogus_features): data[:, i] += y[:] labels = y + np.random.normal(size=(n_instances,)) * noise return dataset_wizard(samples=data, targets=labels)
def test_samplesgroup_mapper(): data = np.arange(24).reshape(8,3) labels = [0, 1] * 4 chunks = np.repeat(np.array((0,1)),4) # correct results csamples = [[3, 4, 5], [6, 7, 8], [15, 16, 17], [18, 19, 20]] clabels = [0, 1, 0, 1] cchunks = [0, 0, 1, 1] ds = dataset_wizard(samples=data, targets=labels, chunks=chunks) # add some feature attribute -- just to check ds.fa['checker'] = np.arange(3) ds.init_origids('samples') m = mean_group_sample(['targets', 'chunks']) mds = m.forward(ds) assert_array_equal(mds.samples, csamples) # FAs should simply remain the same assert_array_equal(mds.fa.checker, np.arange(3)) # now without grouping m = mean_sample() # forwarding just the samples should yield the same result assert_array_equal(m.forward(ds.samples), m.forward(ds).samples) # directly apply to dataset # using untrained mapper m = mean_group_sample(['targets', 'chunks']) mapped = ds.get_mapped(m) assert_equal(mapped.nsamples, 4) assert_equal(mapped.nfeatures, 3) assert_array_equal(mapped.samples, csamples) assert_array_equal(mapped.targets, clabels) assert_array_equal(mapped.chunks, cchunks) # make sure origids get regenerated assert_array_equal([s.count('+') for s in mapped.sa.origids], [1] * 4) # disbalanced dataset -- lets remove 0th sample so there is no target # 0 in 0th chunk ds_ = ds[[0, 1, 3, 5]] mapped = ds_.get_mapped(m) ok_(len(mapped) == 3) ok_(not None in mapped.sa.origids)
def test_surface_dset_niml_io_with_unicode(self, fn): ds = dataset_wizard(np.arange(20).reshape((4, 5)), targets=1, chunks=1) ds.sa['unicode'] = [u'u1', u'uu2', u'uuu3', u'uuuu4'] ds.sa['str'] = ['s1', 'ss2', 'sss3', 'ssss4'] ds.fa['node_indices'] = np.arange(5) # ensure sample attributes are of String type (not array) niml_dict = afni_niml_dset.dset2rawniml(niml.to_niml(ds)) expected_dtypes = dict(PYMVPA_SA_unicode='String', PYMVPA_SA_str='String', PYMVPA_SA_targets='1*int32') def assert_has_expected_datatype(name, expected_dtype, niml): """helper function""" nodes = niml_dict['nodes'] for node in nodes: if node['name'] == name: assert_equal(node['ni_type'], expected_dtype) return raise ValueError('not found: %s', name) for name, expected_dtype in expected_dtypes.iteritems(): assert_has_expected_datatype(name, expected_dtype, niml) # test NIML I/O niml.write(fn, ds) # remove extra fields added when reading the file ds2 = niml.from_any(fn) ds2.a.pop('history') ds2.a.pop('filename') ds2.sa.pop('labels') ds2.sa.pop('stats') # NIML does not support int64, only int32; # compare equality of values in samples by setting the # datatype the same as in the input (int32 or int64 depending # on the platform) ds2.samples = np.asarray(ds2.samples, dtype=ds.samples.dtype) assert_datasets_equal(ds, ds2)
def test_samplesgroup_mapper(): data = np.arange(24).reshape(8, 3) labels = [0, 1] * 4 chunks = np.repeat(np.array((0, 1)), 4) # correct results csamples = [[3, 4, 5], [6, 7, 8], [15, 16, 17], [18, 19, 20]] clabels = [0, 1, 0, 1] cchunks = [0, 0, 1, 1] ds = dataset_wizard(samples=data, targets=labels, chunks=chunks) # add some feature attribute -- just to check ds.fa['checker'] = np.arange(3) ds.init_origids('samples') m = mean_group_sample(['targets', 'chunks']) mds = m.forward(ds) assert_array_equal(mds.samples, csamples) # FAs should simply remain the same assert_array_equal(mds.fa.checker, np.arange(3)) # now without grouping m = mean_sample() # forwarding just the samples should yield the same result assert_array_equal(m.forward(ds.samples), m.forward(ds).samples) # directly apply to dataset # using untrained mapper m = mean_group_sample(['targets', 'chunks']) mapped = ds.get_mapped(m) assert_equal(mapped.nsamples, 4) assert_equal(mapped.nfeatures, 3) assert_array_equal(mapped.samples, csamples) assert_array_equal(mapped.targets, clabels) assert_array_equal(mapped.chunks, cchunks) # make sure origids get regenerated assert_array_equal([s.count('+') for s in mapped.sa.origids], [1] * 4) # disbalanced dataset -- lets remove 0th sample so there is no target # 0 in 0th chunk ds_ = ds[[0, 1, 3, 5]] mapped = ds_.get_mapped(m) ok_(len(mapped) == 3) ok_(not None in mapped.sa.origids)
def chirp_linear(n_instances, n_features=4, n_nonbogus_features=2, data_noise=0.4, noise=0.1): """ Generates simple dataset for linear regressions Generates chirp signal, populates n_nonbogus_features out of n_features with it with different noise level and then provides signal itself with additional noise as labels """ x = np.linspace(0, 1, n_instances) y = np.sin((10 * np.pi * x **2)) data = np.random.normal(size=(n_instances, n_features ))*data_noise for i in xrange(n_nonbogus_features): data[:, i] += y[:] labels = y + np.random.normal(size=(n_instances,))*noise return dataset_wizard(samples=data, targets=labels)
def test_classifier(self): clf = ParametrizedClassifier() self.assertEqual(len(clf.params.items()), 2) # + retrainable self.assertEqual(len(clf.kernel_params.items()), 1) clfe = ParametrizedClassifierExtended() self.assertEqual(len(clfe.params.items()), 2) self.assertEqual(len(clfe.kernel_params.items()), 2) self.assertEqual(len(clfe.kernel_params.listing), 2) # check assignment once again self.assertEqual(clfe.kernel_params.kp2, 200.0) clfe.kernel_params.kp2 = 201.0 self.assertEqual(clfe.kernel_params.kp2, 201.0) self.assertEqual(clfe.kernel_params.is_set("kp2"), True) clfe.train(dataset_wizard(samples=[[0, 0]], targets=[1], chunks=[1])) self.assertEqual(clfe.kernel_params.is_set("kp2"), False) self.assertEqual(clfe.kernel_params.is_set(), False) self.assertEqual(clfe.params.is_set(), False)