def test_slicing(self): hs = HalfPartitioner() spl = Splitter(attr="partitions") splits = list(hs.generate(self.data)) for s in splits: # partitioned dataset shared the data assert_true(s.samples.base is self.data.samples) splits = [list(spl.generate(p)) for p in hs.generate(self.data)] # with numpy 1.7.0b1 "chaining" was deprecated so let's create # check function appropriate for the given numpy version _a = np.arange(5) __a = _a[:4][:3] if __a.base is _a: # 1.7.0b1 def is_the_same_base(x, base=self.data.samples): return x.base is base elif __a.base.base is _a: # prior 1.7.0b1 def is_the_same_base(x, base=self.data.samples): return x.base.base is base else: raise RuntimeError("Uknown handling of .base by numpy") for s in splits: # we get slicing all the time assert_true(is_the_same_base(s[0].samples)) assert_true(is_the_same_base(s[1].samples)) spl = Splitter(attr="partitions", noslicing=True) splits = [list(spl.generate(p)) for p in hs.generate(self.data)] for s in splits: # we no slicing at all assert_false(s[0].samples.base is self.data.samples) assert_false(s[1].samples.base is self.data.samples) nfs = NFoldPartitioner() spl = Splitter(attr="partitions") splits = [list(spl.generate(p)) for p in nfs.generate(self.data)] for i, s in enumerate(splits): # training only first and last split if i == 0 or i == len(splits) - 1: assert_true(is_the_same_base(s[0].samples)) else: assert_true(s[0].samples.base is None) # we get slicing all the time assert_true(is_the_same_base(s[1].samples)) step_ds = Dataset(np.random.randn(20, 2), sa={"chunks": np.tile([0, 1], 10)}) oes = OddEvenPartitioner() spl = Splitter(attr="partitions") splits = list(oes.generate(step_ds)) for s in splits: # partitioned dataset shared the data assert_true(s.samples.base is step_ds.samples) splits = [list(spl.generate(p)) for p in oes.generate(step_ds)] assert_equal(len(splits), 2) for s in splits: # we get slicing all the time assert_true(is_the_same_base(s[0].samples, step_ds.samples)) assert_true(is_the_same_base(s[1].samples, step_ds.samples))
def blocked_detection_n_equals_1(mech_vec_list, mech_nm_list): data, _ = mar.create_blocked_dataset_semantic_classes(mech_vec_list, mech_nm_list, append_robot = False) nfs = NFoldPartitioner(cvtype=1, attr='targets') # 1-fold ? spl = splitters.Splitter(attr='partitions') splits = [list(spl.generate(x)) for x in nfs.generate(data)] ## splitter = NFoldSplitter(cvtype=1) ## label_splitter = NFoldSplitter(cvtype=1, attr='labels') mean_thresh_known_mech_dict = {} for l_wdata, l_vdata in splits: mean_thresh_known_mech_list = [] Ms = mar.compute_Ms(data, l_vdata.targets[0], plot=True) break mechs = l_vdata.uniquechunks for m in mechs: n_std = 0. all_trials = l_vdata.samples[np.where(l_vdata.chunks == m)] le = all_trials.shape[1] for i in range(all_trials.shape[0]): one_trial = all_trials[i,:].reshape(1,le) mn_list, std_list = mar.estimate_theta(one_trial, Ms, plot=False) mn_arr, std_arr = np.array(mn_list), np.array(std_list) n_std = max(n_std, np.max(np.abs(all_trials - mn_arr) / std_arr)) mean_thresh_known_mech_dict[m] = (Ms, n_std) # store on a per mechanism granularity print 'n_std for', m, ':', n_std print 'max error force for', m, ':', np.max(n_std*std_arr[2:])
def test_slicing(self): hs = HalfPartitioner() spl = Splitter(attr='partitions') splits = list(hs.generate(self.data)) for s in splits: # partitioned dataset shared the data assert_true(s.samples.base is self.data.samples) splits = [list(spl.generate(p)) for p in hs.generate(self.data)] # with numpy 1.7.0b1 "chaining" was deprecated so let's create # check function appropriate for the given numpy version _a = np.arange(5) __a = _a[:4][:3] if __a.base is _a: # 1.7.0b1 def is_the_same_base(x, base=self.data.samples): return x.base is base elif __a.base.base is _a: # prior 1.7.0b1 def is_the_same_base(x, base=self.data.samples): return x.base.base is base else: raise RuntimeError("Uknown handling of .base by numpy") for s in splits: # we get slicing all the time assert_true(is_the_same_base(s[0].samples)) assert_true(is_the_same_base(s[1].samples)) spl = Splitter(attr='partitions', noslicing=True) splits = [list(spl.generate(p)) for p in hs.generate(self.data)] for s in splits: # we no slicing at all assert_false(s[0].samples.base is self.data.samples) assert_false(s[1].samples.base is self.data.samples) nfs = NFoldPartitioner() spl = Splitter(attr='partitions') splits = [list(spl.generate(p)) for p in nfs.generate(self.data)] for i, s in enumerate(splits): # training only first and last split if i == 0 or i == len(splits) - 1: assert_true(is_the_same_base(s[0].samples)) else: assert_true(s[0].samples.base is None) # we get slicing all the time assert_true(s[1].samples.base.base is self.data.samples) step_ds = Dataset(np.random.randn(20, 2), sa={'chunks': np.tile([0, 1], 10)}) oes = OddEvenPartitioner() spl = Splitter(attr='partitions') splits = list(oes.generate(step_ds)) for s in splits: # partitioned dataset shared the data assert_true(s.samples.base is step_ds.samples) splits = [list(spl.generate(p)) for p in oes.generate(step_ds)] assert_equal(len(splits), 2) for s in splits: # we get slicing all the time assert_true(s[0].samples.base.base is step_ds.samples) assert_true(s[1].samples.base.base is step_ds.samples)
def test_simplest_cv_pat_gen(self): # create the generator nfs = NFoldPartitioner(cvtype=1) spl = Splitter(attr='partitions') # now get the xval pattern sets One-Fold CV) xvpat = [ list(spl.generate(p)) for p in nfs.generate(self.data) ] self.failUnless( len(xvpat) == 10 ) for i,p in enumerate(xvpat): self.failUnless( len(p) == 2 ) self.failUnless( p[0].nsamples == 90 ) self.failUnless( p[1].nsamples == 10 ) self.failUnless( p[1].chunks[0] == i )
def test_simplest_cv_pat_gen(self): # create the generator nfs = NFoldPartitioner(cvtype=1) spl = Splitter(attr='partitions') # now get the xval pattern sets One-Fold CV) xvpat = [list(spl.generate(p)) for p in nfs.generate(self.data)] self.assertTrue(len(xvpat) == 10) for i, p in enumerate(xvpat): self.assertTrue(len(p) == 2) self.assertTrue(p[0].nsamples == 90) self.assertTrue(p[1].nsamples == 10) self.assertTrue(p[1].chunks[0] == i)
def test_counted_splitting(self): spl = Splitter(attr='partitions') # count > #chunks, should result in 10 splits nchunks = len(self.data.sa['chunks'].unique) for strategy in Partitioner._STRATEGIES: for count, target in [ (nchunks*2, nchunks), (nchunks, nchunks), (nchunks-1, nchunks-1), (3, 3), (0, 0), (1, 1) ]: nfs = NFoldPartitioner(cvtype=1, count=count, selection_strategy=strategy) splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ] self.failUnless(len(splits) == target) chosenchunks = [int(s[1].uniquechunks) for s in splits] # Test if configuration matches as well nsplits_cfg = len(nfs.get_partition_specs(self.data)) self.failUnlessEqual(nsplits_cfg, target) # Check if "lastsplit" dsattr was assigned appropriately nsplits = len(splits) if nsplits > 0: # dummy-proof testing of last split for ds_ in splits[-1]: self.failUnless(ds_.a.lastpartitionset) # test all now for isplit,split in enumerate(splits): for ds_ in split: ds_.a.lastpartitionset == isplit==nsplits-1 # Check results of different strategies if strategy == 'first': self.failUnlessEqual(chosenchunks, range(target)) elif strategy == 'equidistant': if target == 3: self.failUnlessEqual(chosenchunks, [0, 3, 7]) elif strategy == 'random': # none is selected twice self.failUnless(len(set(chosenchunks)) == len(chosenchunks)) self.failUnless(target == len(chosenchunks)) else: raise RuntimeError, "Add unittest for strategy %s" \ % strategy
def test_counted_splitting(self): spl = Splitter(attr='partitions') # count > #chunks, should result in 10 splits nchunks = len(self.data.sa['chunks'].unique) for strategy in Partitioner._STRATEGIES: for count, target in [(nchunks * 2, nchunks), (nchunks, nchunks), (nchunks - 1, nchunks - 1), (3, 3), (0, 0), (1, 1)]: nfs = NFoldPartitioner(cvtype=1, count=count, selection_strategy=strategy) splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ] self.assertTrue(len(splits) == target) chosenchunks = [int(s[1].uniquechunks) for s in splits] # Test if configuration matches as well nsplits_cfg = len(nfs.get_partition_specs(self.data)) self.assertEqual(nsplits_cfg, target) # Check if "lastsplit" dsattr was assigned appropriately nsplits = len(splits) if nsplits > 0: # dummy-proof testing of last split for ds_ in splits[-1]: self.assertTrue(ds_.a.lastpartitionset) # test all now for isplit, split in enumerate(splits): for ds_ in split: ds_.a.lastpartitionset == isplit == nsplits - 1 # Check results of different strategies if strategy == 'first': self.assertEqual(chosenchunks, range(target)) elif strategy == 'equidistant': if target == 3: self.assertEqual(chosenchunks, [0, 3, 7]) elif strategy == 'random': # none is selected twice self.assertTrue( len(set(chosenchunks)) == len(chosenchunks)) self.assertTrue(target == len(chosenchunks)) else: raise RuntimeError, "Add unittest for strategy %s" \ % strategy
def test_slicing(self): hs = HalfPartitioner() spl = Splitter(attr='partitions') splits = list(hs.generate(self.data)) for s in splits: # partitioned dataset shared the data assert_true(s.samples.base is self.data.samples) splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ] for s in splits: # we get slicing all the time assert_true(s[0].samples.base.base is self.data.samples) assert_true(s[1].samples.base.base is self.data.samples) spl = Splitter(attr='partitions', noslicing=True) splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ] for s in splits: # we no slicing at all assert_false(s[0].samples.base is self.data.samples) assert_false(s[1].samples.base is self.data.samples) nfs = NFoldPartitioner() spl = Splitter(attr='partitions') splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ] for i, s in enumerate(splits): # training only first and last split if i == 0 or i == len(splits) - 1: assert_true(s[0].samples.base.base is self.data.samples) else: assert_true(s[0].samples.base is None) # we get slicing all the time assert_true(s[1].samples.base.base is self.data.samples) step_ds = Dataset(np.random.randn(20,2), sa={'chunks': np.tile([0,1], 10)}) oes = OddEvenPartitioner() spl = Splitter(attr='partitions') splits = list(oes.generate(step_ds)) for s in splits: # partitioned dataset shared the data assert_true(s.samples.base is step_ds.samples) splits = [ list(spl.generate(p)) for p in oes.generate(step_ds) ] assert_equal(len(splits), 2) for s in splits: # we get slicing all the time assert_true(s[0].samples.base.base is step_ds.samples) assert_true(s[1].samples.base.base is step_ds.samples)
def _test_gideon_weird_case(self): """'The utter collapse' -- communicated by Peter J. Kohler Desire to collapse all samples per each category in training and testing sets, thus resulting only in a single sample/category per training and per testing. As it is now, CrossValidation on MappedClassifier would not work observations: chance distribution obviously gets wide, but also gets skewed to anti-learning on nfolds like 4. """ from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.knn import kNN clf = kNN() print "HERE" ds = datasets['uni2large'].copy() ds = ds[ds.sa.chunks < 9] accs = [] for i in xrange(10): # # of random samples ds.samples = np.random.randn(*ds.shape) if False: # this would have been a native way IF we allowed change of number of samples clf2 = MappedClassifier(clf=kNN(), #clf, mapper=mean_group_sample(['targets', 'partitions'])) cv = CrossValidation(clf2, NFoldPartitioner(4), postproc=None, enable_ca=['stats']) print cv(ds) else: from mvpa2.clfs.transerror import ConfusionMatrix partitioner = NFoldPartitioner(6) meaner = mean_group_sample(['targets', 'partitions']) cm = ConfusionMatrix() te = TransferMeasure(clf, Splitter('partitions'), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), enable_ca = ['stats'] ) for part in partitioner.generate(ds): ds_meaned = meaner(part) error = np.asscalar(te(ds_meaned)) cm += te.ca.stats print i, cm.stats['ACC'] accs.append(cm.stats['ACC'])
def test_gideon_weird_case(self): """Test if MappedClassifier could handle a mapper altering number of samples 'The utter collapse' -- communicated by Peter J. Kohler Desire to collapse all samples per each category in training and testing sets, thus resulting only in a single sample/category per training and per testing. It is a peculiar scenario which pin points the problem that so far mappers assumed not to change number of samples """ from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.knn import kNN from mvpa2.mappers.base import ChainMapper ds = datasets['uni2large'].copy() #ds = ds[ds.sa.chunks < 9] accs = [] k = 1 # for kNN nf = 1 # for NFoldPartitioner for i in xrange(1): # # of random runs ds.samples = np.random.randn(*ds.shape) # # There are 3 ways to accomplish needed goal # # 0. Hard way: overcome the problem by manually # pre-splitting/meaning in a loop from mvpa2.clfs.transerror import ConfusionMatrix partitioner = NFoldPartitioner(nf) meaner = mean_group_sample(['targets', 'partitions']) cm = ConfusionMatrix() te = TransferMeasure(kNN(k), Splitter('partitions'), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), enable_ca = ['stats'] ) errors = [] for part in partitioner.generate(ds): ds_meaned = meaner(part) errors.append(np.asscalar(te(ds_meaned))) cm += te.ca.stats #print i, cm.stats['ACC'] accs.append(cm.stats['ACC']) if False: # not yet working -- see _tent/allow_ch_nsamples # branch for attempt to make it work # 1. This is a "native way" IF we allow change of number # of samples via _call to be done by MappedClassifier # while operating solely on the mapped dataset clf2 = MappedClassifier(clf=kNN(k), #clf, mapper=mean_group_sample(['targets', 'partitions'])) cv = CrossValidation(clf2, NFoldPartitioner(nf), postproc=None, enable_ca=['stats']) # meaning all should be ok since we should have ballanced # sets across all chunks here errors_native = cv(ds) self.assertEqual(np.max(np.abs(errors_native.samples[:,0] - errors)), 0) # 2. Work without fixes to MappedClassifier allowing # change of # of samples # # CrossValidation will operate on a chain mapper which # would perform necessary meaning first before dealing with # kNN cons: .stats would not be exposed since ChainMapper # doesn't expose them from ChainMapper (yet) if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active: raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper") cv2 = CrossValidation(ChainMapper([mean_group_sample(['targets', 'partitions']), kNN(k)], space='targets'), NFoldPartitioner(nf), postproc=None) errors_native2 = cv2(ds) self.assertEqual(np.max(np.abs(errors_native2.samples[:,0] - errors)), 0) # All of the ways should provide the same results #print i, np.max(np.abs(errors_native.samples[:,0] - errors)), \ # np.max(np.abs(errors_native2.samples[:,0] - errors)) if False: # just to investigate the distribution if we have enough iterations import pylab as pl uaccs = np.unique(accs) step = np.asscalar(np.unique(np.round(uaccs[1:] - uaccs[:-1], 4))) bins = np.linspace(0., 1., np.round(1./step+1)) xx = pl.hist(accs, bins=bins, align='left') pl.xlim((0. - step/2, 1.+step/2))
def test_gideon_weird_case(self): """Test if MappedClassifier could handle a mapper altering number of samples 'The utter collapse' -- communicated by Peter J. Kohler Desire to collapse all samples per each category in training and testing sets, thus resulting only in a single sample/category per training and per testing. It is a peculiar scenario which pin points the problem that so far mappers assumed not to change number of samples """ from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.knn import kNN from mvpa2.mappers.base import ChainMapper ds = datasets['uni2large'].copy() #ds = ds[ds.sa.chunks < 9] accs = [] k = 1 # for kNN nf = 1 # for NFoldPartitioner for i in xrange(1): # # of random runs ds.samples = np.random.randn(*ds.shape) # # There are 3 ways to accomplish needed goal # # 0. Hard way: overcome the problem by manually # pre-splitting/meaning in a loop from mvpa2.clfs.transerror import ConfusionMatrix partitioner = NFoldPartitioner(nf) meaner = mean_group_sample(['targets', 'partitions']) cm = ConfusionMatrix() te = TransferMeasure(kNN(k), Splitter('partitions'), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), enable_ca=['stats']) errors = [] for part in partitioner.generate(ds): ds_meaned = meaner(part) errors.append(np.asscalar(te(ds_meaned))) cm += te.ca.stats #print i, cm.stats['ACC'] accs.append(cm.stats['ACC']) if False: # not yet working -- see _tent/allow_ch_nsamples # branch for attempt to make it work # 1. This is a "native way" IF we allow change of number # of samples via _call to be done by MappedClassifier # while operating solely on the mapped dataset clf2 = MappedClassifier( clf=kNN(k), #clf, mapper=mean_group_sample(['targets', 'partitions'])) cv = CrossValidation(clf2, NFoldPartitioner(nf), postproc=None, enable_ca=['stats']) # meaning all should be ok since we should have ballanced # sets across all chunks here errors_native = cv(ds) self.assertEqual( np.max(np.abs(errors_native.samples[:, 0] - errors)), 0) # 2. Work without fixes to MappedClassifier allowing # change of # of samples # # CrossValidation will operate on a chain mapper which # would perform necessary meaning first before dealing with # kNN cons: .stats would not be exposed since ChainMapper # doesn't expose them from ChainMapper (yet) if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active: raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper") cv2 = CrossValidation(ChainMapper( [mean_group_sample(['targets', 'partitions']), kNN(k)], space='targets'), NFoldPartitioner(nf), postproc=None) errors_native2 = cv2(ds) self.assertEqual( np.max(np.abs(errors_native2.samples[:, 0] - errors)), 0) # All of the ways should provide the same results #print i, np.max(np.abs(errors_native.samples[:,0] - errors)), \ # np.max(np.abs(errors_native2.samples[:,0] - errors)) if False: # just to investigate the distribution if we have enough iterations import pylab as pl uaccs = np.unique(accs) step = np.asscalar(np.unique(np.round(uaccs[1:] - uaccs[:-1], 4))) bins = np.linspace(0., 1., np.round(1. / step + 1)) xx = pl.hist(accs, bins=bins, align='left') pl.xlim((0. - step / 2, 1. + step / 2))
def test_analyzer_with_split_classifier(self, clfds): """Test analyzers in split classifier """ clf, ds = clfds # unroll the tuple # We need to skip some LARSes here _sclf = str(clf) if 'LARS(' in _sclf and "type='stepwise'" in _sclf: # ADD KnownToFail thingie from NiPy return # To don't waste too much time testing lets limit to 3 splits nsplits = 3 partitioner = NFoldPartitioner(count=nsplits) mclf = SplitClassifier(clf=clf, partitioner=partitioner, enable_ca=['training_stats', 'stats']) sana = mclf.get_sensitivity_analyzer(# postproc=absolute_features(), pass_attr=['fa.nonbogus_targets'], enable_ca=["sensitivities"]) ulabels = ds.uniquetargets nlabels = len(ulabels) # Can't rely on splitcfg since count-limit is done in __call__ assert(nsplits == len(list(partitioner.generate(ds)))) sens = sana(ds) assert('nonbogus_targets' in sens.fa) # were they passsed? # TODO: those few do not expose biases if not len(set(clf.__tags__).intersection(('lars', 'glmnet', 'gpr'))): assert('biases' in sens.sa) # print sens.sa.biases # It should return either ... # nlabels * nsplits req_nsamples = [ nlabels * nsplits ] if nlabels == 2: # A single sensitivity in case of binary req_nsamples += [ nsplits ] else: # and for pairs in case of multiclass req_nsamples += [ (nlabels * (nlabels - 1) / 2) * nsplits ] # and for 1-vs-1 embedded within Multiclass operating on # pairs (e.g. SMLR) req_nsamples += [req_nsamples[-1] * 2] # Also for regression_based -- they can do multiclass # but only 1 sensitivity is provided if 'regression_based' in clf.__tags__: req_nsamples += [ nsplits ] # # of features should correspond self.assertEqual(sens.shape[1], ds.nfeatures) # # of samples/sensitivities should also be reasonable self.assertTrue(sens.shape[0] in req_nsamples) # Check if labels are present self.assertTrue('splits' in sens.sa) self.assertTrue('targets' in sens.sa) # should be 1D -- otherwise dtype object self.assertTrue(sens.sa.targets.ndim == 1) sens_ulabels = sens.sa['targets'].unique # Some labels might be pairs(tuples) so ndarray would be of # dtype object and we would need to get them all if sens_ulabels.dtype is np.dtype('object'): sens_ulabels = np.unique( reduce(lambda x, y: x + y, [list(x) for x in sens_ulabels])) assert_array_equal(sens_ulabels, ds.sa['targets'].unique) errors = [x.percent_correct for x in sana.clf.ca.stats.matrices] # lets go through all sensitivities and see if we selected the right # features #if 'meta' in clf.__tags__ and len(sens.samples[0].nonzero()[0])<2: if '5%' in clf.descr \ or (nlabels > 2 and 'regression_based' in clf.__tags__): # Some meta classifiers (5% of ANOVA) are too harsh ;-) # if we get less than 2 features with on-zero sensitivities we # cannot really test # Also -- regression based classifiers performance for multiclass # is expected to suck in general return if cfg.getboolean('tests', 'labile', default='yes'): for conf_matrix in [sana.clf.ca.training_stats] \ + sana.clf.ca.stats.matrices: self.assertTrue( conf_matrix.percent_correct >= 70, msg="We must have trained on each one more or " \ "less correctly. Got %f%% correct on %d labels" % (conf_matrix.percent_correct, nlabels)) # Since now we have per split and possibly per label -- lets just find # mean per each feature per label across splits sensm = FxMapper('samples', lambda x: np.sum(x), uattrs=['targets']).forward(sens) sensgm = maxofabs_sample().forward(sensm) # global max of abs of means assert_equal(sensgm.shape[0], 1) assert_equal(sensgm.shape[1], ds.nfeatures) selected = FixedNElementTailSelector( len(ds.a.bogus_features))(sensgm.samples[0]) if cfg.getboolean('tests', 'labile', default='yes'): self.assertEqual( set(selected), set(ds.a.nonbogus_features), msg="At the end we should have selected the right features. " "Chose %s whenever nonbogus are %s" % (selected, ds.a.nonbogus_features)) # Now test each one per label # TODO: collect all failures and spit them out at once -- # that would make it easy to see if the sensitivity # just has incorrect order of labels assigned for sens1 in sensm: labels1 = sens1.targets # labels (1) for this sensitivity lndim = labels1.ndim label = labels1[0] # current label # XXX whole lndim comparison should be gone after # things get fixed and we arrive here with a tuple! if lndim == 1: # just a single label self.assertTrue(label in ulabels) ilabel_all = np.where(ds.fa.nonbogus_targets == label)[0] # should have just 1 feature for the label self.assertEqual(len(ilabel_all), 1) ilabel = ilabel_all[0] maxsensi = np.argmax(sens1) # index of max sensitivity self.assertEqual(maxsensi, ilabel, "Maximal sensitivity for %s was found in %i whenever" " original feature was %i for nonbogus features %s" % (labels1, maxsensi, ilabel, ds.a.nonbogus_features)) elif lndim == 2 and labels1.shape[1] == 2: # pair of labels # we should have highest (in abs) coefficients in # those two labels maxsensi2 = np.argsort(np.abs(sens1))[0][-2:] ilabel2 = [np.where(ds.fa.nonbogus_targets == l)[0][0] for l in label] self.assertEqual( set(maxsensi2), set(ilabel2), "Maximal sensitivity for %s was found in %s whenever" " original features were %s for nonbogus features %s" % (labels1, maxsensi2, ilabel2, ds.a.nonbogus_features)) """ # Now test for the sign of each one in pair ;) in # all binary problems L1 (-1) -> L2(+1), then # weights for L2 should be positive. to test for # L1 -- invert the sign # We already know (if we haven't failed in previous test), # that those 2 were the strongest -- so check only signs """ self.assertTrue( sens1.samples[0, ilabel2[0]] < 0, "With %i classes in pair %s got feature %i for %r >= 0" % (nlabels, label, ilabel2[0], label[0])) self.assertTrue(sens1.samples[0, ilabel2[1]] > 0, "With %i classes in pair %s got feature %i for %r <= 0" % (nlabels, label, ilabel2[1], label[1])) else: # yoh could be wrong at this assumption... time will show self.fail("Got unknown number labels per sensitivity: %s." " Should be either a single label or a pair" % labels1)
def test_factorialpartitioner(): # Test against sifter and chainmap implemented in test_usecases # -- code below copied from test_usecases -- # Let's simulate the beast -- 6 categories total groupped into 3 # super-ordinate, and actually without any 'superordinate' effect # since subordinate categories independent ds = normal_feature_dataset( nlabels=6, snr=100, perlabel=30, nfeatures=6, nonbogus_features=range(6), nchunks=5 # pure signal! ;) ) ds.sa["subord"] = ds.sa.targets.copy() ds.sa["superord"] = ["super%d" % (int(i[1]) % 3,) for i in ds.targets] # 3 superord categories # let's override original targets just to be sure that we aren't relying on them ds.targets[:] = 0 # let's make two other datasets to test later # one superordinate category only ds_1super = ds.copy() ds_1super.sa["superord"] = ["super1" for i in ds_1super.targets] # one superordinate category has only one subordinate # ds_unbalanced = ds.copy() # nsuper1 = np.sum(ds_unbalanced.sa.superord == 'super1') # mask_superord = ds_unbalanced.sa.superord == 'super1' # uniq_subord = np.unique(ds_unbalanced.sa.subord[mask_superord]) # ds_unbalanced.sa.subord[mask_superord] = [uniq_subord[0] for i in range(nsuper1)] ds_unbalanced = Dataset(range(4), sa={"subord": [0, 0, 1, 2], "superord": [1, 1, 2, 2]}) npart = ChainNode( [ ## so we split based on superord NFoldPartitioner(len(ds.sa["superord"].unique), attr="subord"), ## so it should select only those splits where we took 1 from ## each of the superord categories leaving things in balance Sifter([("partitions", 2), ("superord", {"uvalues": ds.sa["superord"].unique, "balanced": True})]), ], space="partitions", ) # now the new implementation factpart = FactorialPartitioner(NFoldPartitioner(attr="subord"), attr="superord") partitions_npart = [p.sa.partitions for p in npart.generate(ds)] partitions_factpart = [p.sa.partitions for p in factpart.generate(ds)] assert_array_equal(np.sort(partitions_npart), np.sort(partitions_factpart)) # now let's check it behaves correctly if we have only one superord class nfold = NFoldPartitioner(attr="subord") partitions_nfold = [p.sa.partitions for p in nfold.generate(ds_1super)] partitions_factpart = [p.sa.partitions for p in factpart.generate(ds_1super)] assert_array_equal(np.sort(partitions_nfold), np.sort(partitions_factpart)) # smoke test for unbalanced subord classes warning_msg = ( "One or more superordinate attributes do not have the same " "number of subordinate attributes. This could yield to " "unbalanced partitions." ) with assert_warnings([(RuntimeWarning, warning_msg)]): partitions_factpart = [p.sa.partitions for p in factpart.generate(ds_unbalanced)] partitions_unbalanced = [np.array([2, 2, 2, 1]), np.array([2, 2, 1, 2])] superord_unbalanced = [([2], [1, 1, 2]), ([2], [1, 1, 2])] subord_unbalanced = [([2], [0, 0, 1]), ([1], [0, 0, 2])] for out_part, true_part, super_out, sub_out in zip( partitions_factpart, partitions_unbalanced, superord_unbalanced, subord_unbalanced ): assert_array_equal(out_part, true_part) assert_array_equal( (ds_unbalanced[out_part == 1].sa.superord.tolist(), ds_unbalanced[out_part == 2].sa.superord.tolist()), super_out, ) assert_array_equal( (ds_unbalanced[out_part == 1].sa.subord.tolist(), ds_unbalanced[out_part == 2].sa.subord.tolist()), sub_out ) # now let's test on a dummy dataset ds_dummy = Dataset(range(4), sa={"subord": range(4), "superord": [1, 2] * 2}) partitions_factpart = [p.sa.partitions for p in factpart.generate(ds_dummy)] assert_array_equal(partitions_factpart, [[2, 2, 1, 1], [2, 1, 1, 2], [1, 2, 2, 1], [1, 1, 2, 2]])
def generate_roc_curve(mech_vec_list, mech_nm_list, semantic_range = np.arange(0.2, 2.7, 0.3), mech_range = np.arange(0.2, 6.5, 0.7), n_prev_trials = 1, prev_c = 'r', plot_prev=True, sem_c = 'b', sem_m = '+', plot_semantic=True, semantic_label='operating 1st time and \n known mechanism class'): t_nm_list, t_mech_vec_list = [], [] for i, nm in enumerate(mech_nm_list): ## print 'nm:', nm if 'known' in nm: continue t_nm_list.append(nm) t_mech_vec_list.append(mech_vec_list[i]) data, _ = mar.create_blocked_dataset_semantic_classes(t_mech_vec_list, t_nm_list, append_robot = False) ## label_splitter = NFoldSplitter(cvtype=1, attr='labels') thresh_dict = ut.load_pickle('blocked_thresh_dict.pkl') # human + robot data mean_charlie_dict = thresh_dict['mean_charlie'] mean_known_mech_dict = thresh_dict['mean_known_mech'] #---------------- semantic class prior ------------- if plot_semantic: fp_l_l = [] mn_l_l = [] err_l_l = [] mech_fp_l_l = [] mech_mn_l_l = [] mech_err_l_l = [] nfs = NFoldPartitioner(cvtype=1, attr='targets') # 1-fold ? label_splitter = splitters.Splitter(attr='partitions') splits = [list(label_splitter.generate(x)) for x in nfs.generate(data)] # Grouping by labels for l_wdata, l_vdata in splits: #label_splitter(data): print "Number of data: ", len(l_vdata.chunks) # Why zero??? Do we want specific chunk? -> changed into 10 lab = l_vdata.targets[0] # all same label chunk = l_vdata.chunks[0] # chunk should be independant!! trials = l_vdata.samples if lab == 'Refrigerator': lab = 'Fridge' ## tot_mean = None ## tot_std = None ## for chunk in l_vdata.chunks: ## _, mean, std = mean_charlie_dict[chunk] # mean except the specified chunk in same class ## if tot_mean is None: ## tot_mean = mean ## tot_std = std ## else: ## tot_mean += mean ## tot_std += std ## print chunk, mean[0], tot_mean[0] ## mean = tot_mean/float(len(l_vdata.chunks)) ## std = tot_std/float(len(l_vdata.chunks)) ## print mean[0], tot_mean[0], float(len(l_vdata.chunks)) ## sys.exit() # Select evaluation chunk for the ROC ? ## _, mean, std = mean_charlie_dict[lab] _, mean, std = mean_charlie_dict[chunk] # cutting into the same length min_len = min(len(mean), trials.shape[1]) trials = trials[:,:min_len] mean = mean[:min_len] std = std[:min_len] #??? mn_list = [] fp_list, err_list = [], [] for n in semantic_range: err = (mean + n*std) - trials #false_pos = np.sum(np.any(err<0, 1)) #tot = trials.shape[0] false_pos = np.sum(err<0) # Count false cases tot = trials.shape[0] * trials.shape[1] fp_list.append(false_pos/(tot*0.01)) err = err[np.where(err>0)] err_list.append(err.flatten()) mn_list.append(np.mean(err)) err_l_l.append(err_list) fp_l_l.append(fp_list) mn_l_l.append(mn_list) ll = [[] for i in err_l_l[0]] # why 0? for i,e in enumerate(err_l_l): # labels for j,l in enumerate(ll): # multiplier range l.append(e[j]) std_list = [] for l in ll: std_list.append(np.std(np.concatenate(l).flatten())) mn_list = np.mean(np.row_stack(mn_l_l), 0).tolist() # means into a row fp_list = np.mean(np.row_stack(fp_l_l), 0).tolist() #pp.errorbar(fp_list, mn_list, std_list) ## mn_list = np.array(mn_l_l).flatten() ## fp_list = np.array(fp_l_l).flatten() pp.plot(fp_list, mn_list, '--'+sem_m+sem_c, label= semantic_label, mec=sem_c, ms=8, mew=2) #pp.plot(fp_list, mn_list, '-ob', label='with prior') #---------------- mechanism knowledge prior ------------- if plot_prev: t_nm_list, t_mech_vec_list = [], [] for i, nm in enumerate(mech_nm_list): ## print 'nm:', nm if 'known' in nm: t_nm_list.append(nm) t_mech_vec_list.append(mech_vec_list[i]) if t_nm_list == []: t_mech_vec_list = mech_vec_list t_nm_list = mech_nm_list data, _ = mar.create_blocked_dataset_semantic_classes(t_mech_vec_list, t_nm_list, append_robot = False) ## chunk_splitter = NFoldSplitter(cvtype=1, attr='chunks') nfs = NFoldPartitioner(cvtype=1, attr='chunks') # 1-fold ? chunk_splitter = splitters.Splitter(attr='partitions') splits = [list(label_splitter.generate(x)) for x in nfs.generate(data)] err_mean_list = [] err_std_list = [] fp_list = [] for n in mech_range: false_pos = 0 n_trials = 0 err_list = [] for _, l_vdata in splits: #chunk_splitter(data): lab = l_vdata.targets[0] trials = l_vdata.samples m = l_vdata.chunks[0] #one_trial = trials[0].reshape(1, len(trials[0])) one_trial = trials[0:n_prev_trials] ## print n, ": ", lab, chunk Ms, n_std = mean_known_mech_dict[m] mn_list, std_list = mar.estimate_theta(one_trial, Ms, plot=False, add_var = 0.0) mn_mech_arr = np.array(mn_list) std_mech_arr = np.array(std_list) # trials = trials[:,:len(mn_mech_arr)] min_len = min(len(mn_mech_arr), trials.shape[1]) trials = trials[:,:min_len] mn_mech_arr = mn_mech_arr[:min_len] std_mech_arr = std_mech_arr[:min_len] for t in trials: err = (mn_mech_arr + n*std_mech_arr) - t #false_pos += np.any(err<0) #n_trials += 1 false_pos += np.sum(err<0) n_trials += len(err) err = err[np.where(err>0)] err_list.append(err) e_all = np.concatenate(err_list) err_mean_list.append(np.mean(e_all)) err_std_list.append(np.std(e_all)) fp_list.append(false_pos/(n_trials*0.01)) #pp.plot(fp_list, err_mean_list, '-o'+prev_c, label='knowledge of mechanism and \n opened earlier %d times'%n_prev_trials) pp.plot(fp_list, err_mean_list, '-o'+prev_c, mec=prev_c, ms=5, label='operating 2nd time and \n known mechanism identity') #pp.plot(fp_list, err_mean_list, '-or', label='with prior') pp.xlabel('False positive rate (percentage)', fontsize=22) pp.ylabel('Mean excess force (Newtons)', fontsize=22) pp.xlim(-0.5,45) mpu.legend()
def test_factorialpartitioner(): # Test against sifter and chainmap implemented in test_usecases # -- code below copied from test_usecases -- # Let's simulate the beast -- 6 categories total groupped into 3 # super-ordinate, and actually without any 'superordinate' effect # since subordinate categories independent ds = normal_feature_dataset( nlabels=6, snr=100, # pure signal! ;) perlabel=30, nfeatures=6, nonbogus_features=range(6), nchunks=5) ds.sa['subord'] = ds.sa.targets.copy() ds.sa['superord'] = ['super%d' % (int(i[1]) % 3, ) for i in ds.targets] # 3 superord categories # let's override original targets just to be sure that we aren't relying on them ds.targets[:] = 0 # let's make two other datasets to test later # one superordinate category only ds_1super = ds.copy() ds_1super.sa['superord'] = ['super1' for i in ds_1super.targets] # one superordinate category has only one subordinate #ds_unbalanced = ds.copy() #nsuper1 = np.sum(ds_unbalanced.sa.superord == 'super1') #mask_superord = ds_unbalanced.sa.superord == 'super1' #uniq_subord = np.unique(ds_unbalanced.sa.subord[mask_superord]) #ds_unbalanced.sa.subord[mask_superord] = [uniq_subord[0] for i in range(nsuper1)] ds_unbalanced = Dataset(range(4), sa={ 'subord': [0, 0, 1, 2], 'superord': [1, 1, 2, 2] }) npart = ChainNode( [ ## so we split based on superord NFoldPartitioner(len(ds.sa['superord'].unique), attr='subord'), ## so it should select only those splits where we took 1 from ## each of the superord categories leaving things in balance Sifter([('partitions', 2), ('superord', { 'uvalues': ds.sa['superord'].unique, 'balanced': True })]), ], space='partitions') # now the new implementation factpart = FactorialPartitioner(NFoldPartitioner(attr='subord'), attr='superord') partitions_npart = [p.sa.partitions for p in npart.generate(ds)] partitions_factpart = [p.sa.partitions for p in factpart.generate(ds)] assert_array_equal(np.sort(partitions_npart), np.sort(partitions_factpart)) # now let's check it behaves correctly if we have only one superord class nfold = NFoldPartitioner(attr='subord') partitions_nfold = [p.sa.partitions for p in nfold.generate(ds_1super)] partitions_factpart = [ p.sa.partitions for p in factpart.generate(ds_1super) ] assert_array_equal(np.sort(partitions_nfold), np.sort(partitions_factpart)) # smoke test for unbalanced subord classes warning_msg = 'One or more superordinate attributes do not have the same '\ 'number of subordinate attributes. This could yield to '\ 'unbalanced partitions.' with assert_warnings([(RuntimeWarning, warning_msg)]): partitions_factpart = [ p.sa.partitions for p in factpart.generate(ds_unbalanced) ] partitions_unbalanced = [np.array([2, 2, 2, 1]), np.array([2, 2, 1, 2])] superord_unbalanced = [([2], [1, 1, 2]), ([2], [1, 1, 2])] subord_unbalanced = [([2], [0, 0, 1]), ([1], [0, 0, 2])] for out_part, true_part, super_out, sub_out in \ zip(partitions_factpart, partitions_unbalanced, superord_unbalanced, subord_unbalanced): assert_array_equal(out_part, true_part) assert_array_equal((ds_unbalanced[out_part == 1].sa.superord.tolist(), ds_unbalanced[out_part == 2].sa.superord.tolist()), super_out) assert_array_equal((ds_unbalanced[out_part == 1].sa.subord.tolist(), ds_unbalanced[out_part == 2].sa.subord.tolist()), sub_out) # now let's test on a dummy dataset ds_dummy = Dataset(range(4), sa={ 'subord': range(4), 'superord': [1, 2] * 2 }) partitions_factpart = [ p.sa.partitions for p in factpart.generate(ds_dummy) ] assert_array_equal( partitions_factpart, [[2, 2, 1, 1], [2, 1, 1, 2], [1, 2, 2, 1], [1, 1, 2, 2]])
def test_analyzer_with_split_classifier(self, clfds): """Test analyzers in split classifier """ clf, ds = clfds # unroll the tuple # We need to skip some LARSes here _sclf = str(clf) if 'LARS(' in _sclf and "type='stepwise'" in _sclf: # ADD KnownToFail thingie from NiPy return # To don't waste too much time testing lets limit to 3 splits nsplits = 3 partitioner = NFoldPartitioner(count=nsplits) mclf = SplitClassifier(clf=clf, partitioner=partitioner, enable_ca=['training_stats', 'stats']) sana = mclf.get_sensitivity_analyzer( # postproc=absolute_features(), pass_attr=['fa.nonbogus_targets'], enable_ca=["sensitivities"]) ulabels = ds.uniquetargets nlabels = len(ulabels) # Can't rely on splitcfg since count-limit is done in __call__ assert (nsplits == len(list(partitioner.generate(ds)))) sens = sana(ds) assert ('nonbogus_targets' in sens.fa) # were they passsed? # TODO: those few do not expose biases if not len(set(clf.__tags__).intersection(('lars', 'glmnet', 'gpr'))): assert ('biases' in sens.sa) # print sens.sa.biases # It should return either ... # nlabels * nsplits req_nsamples = [nlabels * nsplits] if nlabels == 2: # A single sensitivity in case of binary req_nsamples += [nsplits] else: # and for pairs in case of multiclass req_nsamples += [(nlabels * (nlabels - 1) / 2) * nsplits] # and for 1-vs-1 embedded within Multiclass operating on # pairs (e.g. SMLR) req_nsamples += [req_nsamples[-1] * 2] # Also for regression_based -- they can do multiclass # but only 1 sensitivity is provided if 'regression_based' in clf.__tags__: req_nsamples += [nsplits] # # of features should correspond self.assertEqual(sens.shape[1], ds.nfeatures) # # of samples/sensitivities should also be reasonable self.assertTrue(sens.shape[0] in req_nsamples) # Check if labels are present self.assertTrue('splits' in sens.sa) self.assertTrue('targets' in sens.sa) # should be 1D -- otherwise dtype object self.assertTrue(sens.sa.targets.ndim == 1) sens_ulabels = sens.sa['targets'].unique # Some labels might be pairs(tuples) so ndarray would be of # dtype object and we would need to get them all if sens_ulabels.dtype is np.dtype('object'): sens_ulabels = np.unique( reduce(lambda x, y: x + y, [list(x) for x in sens_ulabels])) assert_array_equal(sens_ulabels, ds.sa['targets'].unique) errors = [x.percent_correct for x in sana.clf.ca.stats.matrices] # lets go through all sensitivities and see if we selected the right # features #if 'meta' in clf.__tags__ and len(sens.samples[0].nonzero()[0])<2: if '5%' in clf.descr \ or (nlabels > 2 and 'regression_based' in clf.__tags__): # Some meta classifiers (5% of ANOVA) are too harsh ;-) # if we get less than 2 features with on-zero sensitivities we # cannot really test # Also -- regression based classifiers performance for multiclass # is expected to suck in general return if cfg.getboolean('tests', 'labile', default='yes'): for conf_matrix in [sana.clf.ca.training_stats] \ + sana.clf.ca.stats.matrices: self.assertTrue( conf_matrix.percent_correct>=70, msg="We must have trained on each one more or " \ "less correctly. Got %f%% correct on %d labels" % (conf_matrix.percent_correct, nlabels)) # Since now we have per split and possibly per label -- lets just find # mean per each feature per label across splits sensm = FxMapper('samples', lambda x: np.sum(x), uattrs=['targets']).forward(sens) sensgm = maxofabs_sample().forward(sensm) # global max of abs of means assert_equal(sensgm.shape[0], 1) assert_equal(sensgm.shape[1], ds.nfeatures) selected = FixedNElementTailSelector(len(ds.a.bogus_features))( sensgm.samples[0]) if cfg.getboolean('tests', 'labile', default='yes'): self.assertEqual( set(selected), set(ds.a.nonbogus_features), msg="At the end we should have selected the right features. " "Chose %s whenever nonbogus are %s" % (selected, ds.a.nonbogus_features)) # Now test each one per label # TODO: collect all failures and spit them out at once -- # that would make it easy to see if the sensitivity # just has incorrect order of labels assigned for sens1 in sensm: labels1 = sens1.targets # labels (1) for this sensitivity lndim = labels1.ndim label = labels1[0] # current label # XXX whole lndim comparison should be gone after # things get fixed and we arrive here with a tuple! if lndim == 1: # just a single label self.assertTrue(label in ulabels) ilabel_all = np.where(ds.fa.nonbogus_targets == label)[0] # should have just 1 feature for the label self.assertEqual(len(ilabel_all), 1) ilabel = ilabel_all[0] maxsensi = np.argmax(sens1) # index of max sensitivity self.assertEqual( maxsensi, ilabel, "Maximal sensitivity for %s was found in %i whenever" " original feature was %i for nonbogus features %s" % (labels1, maxsensi, ilabel, ds.a.nonbogus_features)) elif lndim == 2 and labels1.shape[1] == 2: # pair of labels # we should have highest (in abs) coefficients in # those two labels maxsensi2 = np.argsort(np.abs(sens1))[0][-2:] ilabel2 = [ np.where(ds.fa.nonbogus_targets == l)[0][0] for l in label ] self.assertEqual( set(maxsensi2), set(ilabel2), "Maximal sensitivity for %s was found in %s whenever" " original features were %s for nonbogus features %s" % (labels1, maxsensi2, ilabel2, ds.a.nonbogus_features)) """ # Now test for the sign of each one in pair ;) in # all binary problems L1 (-1) -> L2(+1), then # weights for L2 should be positive. to test for # L1 -- invert the sign # We already know (if we haven't failed in previous test), # that those 2 were the strongest -- so check only signs """ self.assertTrue( sens1.samples[0, ilabel2[0]] < 0, "With %i classes in pair %s got feature %i for %r >= 0" % (nlabels, label, ilabel2[0], label[0])) self.assertTrue( sens1.samples[0, ilabel2[1]] > 0, "With %i classes in pair %s got feature %i for %r <= 0" % (nlabels, label, ilabel2[1], label[1])) else: # yoh could be wrong at this assumption... time will show self.fail("Got unknown number labels per sensitivity: %s." " Should be either a single label or a pair" % labels1)
def timesegments_classification(dss, window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ part2 = NFoldPartitioner(attr='subjects') # Check if input list contains Datasets, ndarrays dss = [Dataset(ds) if not type(ds) == Dataset else ds for ds in dss] # TODO: allow for doing feature selection if do_zscore: for ds in dss: zscore(ds, chunks_attr=None) # assign .sa.subjects to those datasets for i, ds in enumerate(dss): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints(len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list( Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert (ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors_across_subjects = np.asarray(errors_across_subjects) if __debug__: debug( "BM", "Finished with %s array of errors. Mean error %.2f" % (errors_across_subjects.shape, np.mean(errors_across_subjects))) return errors_across_subjects