def test_cached_kernel(self): nchunks = 5 n = 50 * nchunks d = Dataset(np.random.randn(n, 132)) d.sa.chunks = np.random.randint(nchunks, size=n) # We'll compare against an Rbf just because it has a parameter to change rk = npK.RbfKernel(sigma=1.5) # Assure two kernels are independent for this test ck = CachedKernel(kernel=npK.RbfKernel(sigma=1.5)) ck.compute(d) # Initial cache of all data self.failUnless(ck._recomputed, 'CachedKernel was not initially computed') # Try some splitting for chunk in [d[d.sa.chunks == i] for i in range(nchunks)]: rk.compute(chunk) ck.compute(chunk) self.kernel_equiv(rk, ck) #, accuracy=1e-12) self.failIf(ck._recomputed, "CachedKernel incorrectly recomputed it's kernel") # Test what happens when a parameter changes ck.params.sigma = 3.5 ck.compute(d) self.failUnless(ck._recomputed, "CachedKernel doesn't recompute on kernel change") rk.params.sigma = 3.5 rk.compute(d) self.failUnless(np.all(rk._k == ck._k), 'Cached and rbf kernels disagree after kernel change') # Now test handling new data d2 = Dataset(np.random.randn(32, 43)) ck.compute(d2) self.failUnless( ck._recomputed, "CachedKernel did not automatically recompute new data") ck.compute(d) self.failUnless(ck._recomputed, "CachedKernel did not recompute old data which had\n" +\ "previously been computed, but had the cache overriden")
def test_cached_kernel(self): nchunks = 5 n = 50*nchunks d = Dataset(np.random.randn(n, 132)) d.sa.chunks = np.random.randint(nchunks, size=n) # We'll compare against an Rbf just because it has a parameter to change rk = npK.RbfKernel(sigma=1.5) # Assure two kernels are independent for this test ck = CachedKernel(kernel=npK.RbfKernel(sigma=1.5)) ck.compute(d) # Initial cache of all data self.failUnless(ck._recomputed, 'CachedKernel was not initially computed') # Try some splitting for chunk in [d[d.sa.chunks == i] for i in range(nchunks)]: rk.compute(chunk) ck.compute(chunk) self.kernel_equiv(rk, ck) #, accuracy=1e-12) self.failIf(ck._recomputed, "CachedKernel incorrectly recomputed it's kernel") # Test what happens when a parameter changes ck.params.sigma = 3.5 ck.compute(d) self.failUnless(ck._recomputed, "CachedKernel doesn't recompute on kernel change") rk.params.sigma = 3.5 rk.compute(d) self.failUnless(np.all(rk._k == ck._k), 'Cached and rbf kernels disagree after kernel change') # Now test handling new data d2 = Dataset(np.random.randn(32, 43)) ck.compute(d2) self.failUnless(ck._recomputed, "CachedKernel did not automatically recompute new data") ck.compute(d) self.failUnless(ck._recomputed, "CachedKernel did not recompute old data which had\n" +\ "previously been computed, but had the cache overriden")
def test_cached_kernel_different_datasets(self): skip_if_no_external('shogun', ver_dep='shogun:rev', min_version=4455) # Inspired by the problem Swaroop ran into k = LinearSGKernel(normalizer_cls=False) k_ = LinearSGKernel(normalizer_cls=False) # to be cached ck = CachedKernel(k_) clf = sgSVM(svm_impl='libsvm', kernel=k, C=-1) clf_ = sgSVM(svm_impl='libsvm', kernel=ck, C=-1) cvte = CrossValidatedTransferError(TransferError(clf), NFoldSplitter()) cvte_ = CrossValidatedTransferError(TransferError(clf_), NFoldSplitter()) te = TransferError(clf) te_ = TransferError(clf_) for r in xrange(2): ds1 = datasets['uni2medium'] errs1 = cvte(ds1) ck.compute(ds1) ok_(ck._recomputed) errs1_ = cvte_(ds1) ok_(~ck._recomputed) assert_array_equal(errs1, errs1_) ds2 = datasets['uni3small'] errs2 = cvte(ds2) ck.compute(ds2) ok_(ck._recomputed) errs2_ = cvte_(ds2) ok_(~ck._recomputed) assert_array_equal(errs2, errs2_) ssel = np.round(datasets['uni2large'].samples[:5, 0]).astype(int) terr = te(datasets['uni3small_test'][ssel], datasets['uni3small_train'][::2]) terr_ = te_(datasets['uni3small_test'][ssel], datasets['uni3small_train'][::2]) ok_(~ck._recomputed) ok_(terr == terr_)
def test_cached_kernel_different_datasets(self): skip_if_no_external('shogun', ver_dep='shogun:rev', min_version=4455) # Inspired by the problem Swaroop ran into k = LinearSGKernel(normalizer_cls=False) k_ = LinearSGKernel(normalizer_cls=False) # to be cached ck = CachedKernel(k_) clf = sgSVM(svm_impl='libsvm', kernel=k, C=-1) clf_ = sgSVM(svm_impl='libsvm', kernel=ck, C=-1) cvte = CrossValidation(clf, NFoldPartitioner()) cvte_ = CrossValidation(clf_, NFoldPartitioner()) postproc=BinaryFxNode(mean_mismatch_error, 'targets') te = ProxyMeasure(clf, postproc=postproc) te_ = ProxyMeasure(clf_, postproc=postproc) for r in xrange(2): ds1 = datasets['uni2medium'] errs1 = cvte(ds1) ck.compute(ds1) ok_(ck._recomputed) errs1_ = cvte_(ds1) ok_(~ck._recomputed) assert_array_equal(errs1, errs1_) ds2 = datasets['uni3small'] errs2 = cvte(ds2) ck.compute(ds2) ok_(ck._recomputed) errs2_ = cvte_(ds2) ok_(~ck._recomputed) assert_array_equal(errs2, errs2_) ssel = np.round(datasets['uni2large'].samples[:5, 0]).astype(int) te.train(datasets['uni3small'][::2]) terr = np.asscalar(te(datasets['uni3small'][ssel])) te_.train(datasets['uni3small'][::2]) terr_ = np.asscalar(te_(datasets['uni3small'][ssel])) ok_(~ck._recomputed) ok_(terr == terr_)
def test_cached_kernel_different_datasets(self): skip_if_no_external('shogun', ver_dep='shogun:rev', min_version=4455) # Inspired by the problem Swaroop ran into k = LinearSGKernel(normalizer_cls=False) k_ = LinearSGKernel(normalizer_cls=False) # to be cached ck = CachedKernel(k_) clf = sgSVM(svm_impl='libsvm', kernel=k, C=-1) clf_ = sgSVM(svm_impl='libsvm', kernel=ck, C=-1) cvte = CrossValidatedTransferError( TransferError(clf), NFoldSplitter()) cvte_ = CrossValidatedTransferError( TransferError(clf_), NFoldSplitter()) te = TransferError(clf) te_ = TransferError(clf_) for r in xrange(2): ds1 = datasets['uni2medium'] errs1 = cvte(ds1) ck.compute(ds1) ok_(ck._recomputed) errs1_ = cvte_(ds1) ok_(~ck._recomputed) assert_array_equal(errs1, errs1_) ds2 = datasets['uni3small'] errs2 = cvte(ds2) ck.compute(ds2) ok_(ck._recomputed) errs2_ = cvte_(ds2) ok_(~ck._recomputed) assert_array_equal(errs2, errs2_) ssel = np.round(datasets['uni2large'].samples[:5, 0]).astype(int) terr = te(datasets['uni3small_test'][ssel], datasets['uni3small_train'][::2]) terr_ = te_(datasets['uni3small_test'][ssel], datasets['uni3small_train'][::2]) ok_(~ck._recomputed) ok_(terr == terr_)
def test_vstack_and_origids_issue(self): # That is actually what swaroop hit skip_if_no_external('shogun', ver_dep='shogun:rev', min_version=4455) # Inspired by the problem Swaroop ran into k = LinearSGKernel(normalizer_cls=False) k_ = LinearSGKernel(normalizer_cls=False) # to be cached ck = CachedKernel(k_) clf = sgSVM(svm_impl='libsvm', kernel=k, C=-1) clf_ = sgSVM(svm_impl='libsvm', kernel=ck, C=-1) cvte = CrossValidatedTransferError( TransferError(clf), NFoldSplitter()) cvte_ = CrossValidatedTransferError( TransferError(clf_), NFoldSplitter()) ds = datasets['uni2large_test'].copy(deep=True) ok_(~('orig_ids' in ds.sa)) # assure that there are None ck.compute(ds) # so we initialize origids ok_('origids' in ds.sa) ds2 = ds.copy(deep=True) ds2.samples = np.zeros(ds2.shape) from mvpa.base.dataset import vstack ds_vstacked = vstack((ds2, ds)) # should complaint now since there would not be unique # samples' origids if __debug__: assert_raises(ValueError, ck.compute, ds_vstacked) ds_vstacked.init_origids('samples') # reset origids ck.compute(ds_vstacked) errs = cvte(ds_vstacked) errs_ = cvte_(ds_vstacked) # Following test would have failed since origids # were just ints, and then non-unique after vstack assert_array_equal(errs.samples, errs_.samples)
def test_vstack_and_origids_issue(self): # That is actually what swaroop hit skip_if_no_external('shogun', ver_dep='shogun:rev', min_version=4455) # Inspired by the problem Swaroop ran into k = LinearSGKernel(normalizer_cls=False) k_ = LinearSGKernel(normalizer_cls=False) # to be cached ck = CachedKernel(k_) clf = sgSVM(svm_impl='libsvm', kernel=k, C=-1) clf_ = sgSVM(svm_impl='libsvm', kernel=ck, C=-1) cvte = CrossValidatedTransferError(TransferError(clf), NFoldSplitter()) cvte_ = CrossValidatedTransferError(TransferError(clf_), NFoldSplitter()) ds = datasets['uni2large_test'].copy(deep=True) ok_(~('orig_ids' in ds.sa)) # assure that there are None ck.compute(ds) # so we initialize origids ok_('origids' in ds.sa) ds2 = ds.copy(deep=True) ds2.samples = np.zeros(ds2.shape) from mvpa.base.dataset import vstack ds_vstacked = vstack((ds2, ds)) # should complaint now since there would not be unique # samples' origids if __debug__: assert_raises(ValueError, ck.compute, ds_vstacked) ds_vstacked.init_origids('samples') # reset origids ck.compute(ds_vstacked) errs = cvte(ds_vstacked) errs_ = cvte_(ds_vstacked) # Following test would have failed since origids # were just ints, and then non-unique after vstack assert_array_equal(errs.samples, errs_.samples)