def testSFANode_rank_deficit(): def test_for_data(dat, dat0, dfc, out, eq_pr_dict=None, rk_thr_dict=None, check_data=None, check_dfc=None): if eq_pr_dict is None: eq_pr_dict = {'reg': 5, 'pca': 5, 'svd': 5, 'ldl': 5} if check_data is None: check_data = {'reg': True, 'pca': True, 'svd': True, 'ldl': True} if check_dfc is None: check_dfc = {'reg': True, 'pca': True, 'svd': True, 'ldl': True} if rk_thr_dict is None: rk_thr_dict = { \ 'reg': 1e-10, 'pca': 1e-10, 'svd': 1e-10, 'ldl': 1e-10} sfa0 = mdp.nodes.SFANode(output_dim=out) sfa0.train(dat0) sfa0.stop_training() sdat0 = sfa0.execute(dat0) sfa2_reg = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='reg') sfa2_reg.rank_threshold = rk_thr_dict['reg'] # This is equivalent to sfa2._sfa_solver = sfa2._rank_deficit_solver_reg sfa2_reg.train(dat) sfa2_reg.stop_training() sdat_reg = sfa2_reg.execute(dat) sfa2_pca = mdp.nodes.SFANode(output_dim=out) # For this test we add the rank_deficit_solver later, so we can # assert that ordinary SFA would actually fail on the data. sfa2_pca.train(dat) try: sfa2_pca.stop_training() # Assert that with dfc > 0 ordinary SFA wouldn't reach this line. assert dfc == 0 except mdp.NodeException: sfa2_pca.set_rank_deficit_method('pca') sfa2_pca.rank_threshold = rk_thr_dict['pca'] sfa2_pca.stop_training() sdat_pca = sfa2_pca.execute(dat) sfa2_svd = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='svd') sfa2_svd.rank_threshold = rk_thr_dict['svd'] sfa2_svd.train(dat) sfa2_svd.stop_training() sdat_svd = sfa2_svd.execute(dat) def matrix_cmp(A, B): assert_array_almost_equal(abs(A), abs(B)) return True if check_data['reg']: assert_array_almost_equal(abs(sdat_reg), abs(sdat0), eq_pr_dict['reg']) if check_data['pca']: assert_array_almost_equal(abs(sdat_pca), abs(sdat0), eq_pr_dict['pca']) if check_data['svd']: assert_array_almost_equal(abs(sdat_svd), abs(sdat0), eq_pr_dict['svd']) reg_dfc = sfa2_reg.rank_deficit == dfc pca_dfc = sfa2_pca.rank_deficit == dfc svd_dfc = sfa2_svd.rank_deficit == dfc if reg_dfc: assert_array_almost_equal(sfa2_reg.d, sfa0.d, eq_pr_dict['reg']) if pca_dfc: assert_array_almost_equal(sfa2_pca.d, sfa0.d, eq_pr_dict['pca']) if svd_dfc: assert_array_almost_equal(sfa2_svd.d, sfa0.d, eq_pr_dict['svd']) # check that constraints are met idn = numx.identity(out) d_diag = numx.diag(sfa0.d) # reg ok? assert_array_almost_equal( mult(sdat_reg.T, sdat_reg) / (len(sdat_reg) - 1), idn, eq_pr_dict['reg']) sdat_reg_d = sdat_reg[1:] - sdat_reg[:-1] assert_array_almost_equal( mult(sdat_reg_d.T, sdat_reg_d) / (len(sdat_reg_d) - 1), d_diag, eq_pr_dict['reg']) # pca ok? assert_array_almost_equal( mult(sdat_pca.T, sdat_pca) / (len(sdat_pca) - 1), idn, eq_pr_dict['pca']) sdat_pca_d = sdat_pca[1:] - sdat_pca[:-1] assert_array_almost_equal( mult(sdat_pca_d.T, sdat_pca_d) / (len(sdat_pca_d) - 1), d_diag, eq_pr_dict['pca']) # svd ok? assert_array_almost_equal( mult(sdat_svd.T, sdat_svd) / (len(sdat_svd) - 1), idn, eq_pr_dict['svd']) sdat_svd_d = sdat_svd[1:] - sdat_svd[:-1] assert_array_almost_equal( mult(sdat_svd_d.T, sdat_svd_d) / (len(sdat_svd_d) - 1), d_diag, eq_pr_dict['svd']) try: # test ldl separately due to its requirement of SciPy >= 1.0 sfa2_ldl = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='ldl') sfa2_ldl.rank_threshold = rk_thr_dict['ldl'] have_ldl = True except NodeException: # No SciPy >= 1.0 available. have_ldl = False if have_ldl: sfa2_ldl.train(dat) sfa2_ldl.stop_training() sdat_ldl = sfa2_ldl.execute(dat) if check_data['ldl']: assert_array_almost_equal(abs(sdat_ldl), abs(sdat0), eq_pr_dict['ldl']) ldl_dfc = sfa2_ldl.rank_deficit == dfc if ldl_dfc: assert_array_almost_equal(sfa2_ldl.d, sfa0.d, eq_pr_dict['ldl']) # check that constraints are met # ldl ok? assert_array_almost_equal( mult(sdat_ldl.T, sdat_ldl) / (len(sdat_ldl) - 1), idn, eq_pr_dict['ldl']) sdat_ldl_d = sdat_ldl[1:] - sdat_ldl[:-1] assert_array_almost_equal( mult(sdat_ldl_d.T, sdat_ldl_d) / (len(sdat_ldl_d) - 1), d_diag, eq_pr_dict['ldl']) else: ldl_dfc = None ldl_dfc2 = ldl_dfc is True or (ldl_dfc is None and not have_ldl) assert all((reg_dfc or not check_dfc['reg'], pca_dfc or not check_dfc['pca'], svd_dfc or not check_dfc['svd'], ldl_dfc2 or not check_dfc['ldl'])), \ "Rank deficit ok? reg: %s, pca: %s, svd: %s, ldl: %s" % \ (reg_dfc, pca_dfc, svd_dfc, ldl_dfc) return sfa2_pca.d # ============test with random data: dfc_max = 200 dat_dim = 500 dat_smpl = 10000 dat = numx.random.rand(dat_smpl, dat_dim) # test data dfc = numx.random.randint(0, dfc_max) # rank deficit out = numx.random.randint(4, dat_dim - 50 - dfc) # output dim # We add some linear redundancy to the data... if dfc > 0: # dfc is how many dimensions we overwrite with duplicates # This should yield an overal rank deficit of dfc dat0 = dat[:, :-dfc] # for use by ordinary SFA dat[:, -dfc:] = dat[:, :dfc] else: dat0 = dat test_for_data(dat, dat0, dfc, out) # We mix the redundancy a bit more with the other data and test again... if dfc > 0: # This should yield an overal rank deficit of dfc ovl = numx.random.randint(0, dat_dim - max(out, dfc_max)) # We generate a random, yet orthogonal matrix M for mixing: M = numx.random.rand(dfc + ovl, dfc + ovl) _, M = symeig(M + M.T) dat[:, -(dfc + ovl):] = dat[:, -(dfc + ovl):].dot(M) # We test again with mixing matrix applied test_for_data(dat, dat0, dfc, out) # ============test with nasty data: # Create another set of data... dat = numx.random.rand(dat_smpl, dat_dim) # test data dfc = numx.random.randint(0, dfc_max) # rank deficit out = numx.random.randint(4, dat_dim - 50 - dfc) # output dim # We add some linear redundancy to the data... if dfc > 0: # dfc is how many dimensions we overwrite with duplicates # This should yield an overal rank deficit of dfc dat0 = dat[:, :-dfc] # for use by ordinary SFA dat[:, -dfc:] = dat[:, :dfc] else: dat0 = dat # And additionally add a very slow actual feature... dat[:, dfc] = numx.arange(dat_smpl) # We mute some checks here because they sometimes fail check_data = {'reg': False, 'pca': False, 'svd': False, 'ldl': False} check_dfc = {'reg': False, 'pca': False, 'svd': False, 'ldl': False} # Note: In most cases accuracy is much higher than checked here. eq_pr_dict = {'reg': 2, 'pca': 2, 'svd': 2, 'ldl': 2} rk_thr_dict = {'reg': 1e-8, 'pca': 1e-7, 'svd': 1e-7, 'ldl': 1e-6} # Here we assert the very slow but actual feature is not filtered out: assert test_for_data(dat, dat0, dfc, out, eq_pr_dict, rk_thr_dict, check_data, check_dfc)[0] < 1e-5 # We mix the redundancy a bit more with the other data and test again... if dfc > 0: # This should yield an overal rank deficit of dfc ovl = numx.random.randint(0, dat_dim - max(out, dfc_max)) # We generate a random, yet orthogonal matrix M for mixing: M = numx.random.rand(dfc + ovl, dfc + ovl) _, M = symeig(M + M.T) dat[:, -(dfc + ovl):] = dat[:, -(dfc + ovl):].dot(M) # We test again with mixing matrix applied # Again we assert the very slow but actual feature is not filtered out: assert test_for_data(dat, dat0, dfc, out, eq_pr_dict, rk_thr_dict, check_data, check_dfc)[0] < 1e-5
def _stop_training(self): Cumulator._stop_training(self) if self.verbose: msg = ('training LLE on %i points' ' in %i dimensions...' % (self.data.shape[0], self.data.shape[1])) print msg # some useful quantities M = self.data N = M.shape[0] k = self.k r = self.r # indices of diagonal elements W_diag_idx = numx.arange(N) Q_diag_idx = numx.arange(k) if k > N: err = ('k=%i must be less than or ' 'equal to number of training points N=%i' % (k, N)) raise TrainingException(err) # determines number of output dimensions: if desired_variance # is specified, we need to learn it from the data. Otherwise, # it's easy learn_outdim = False if self.output_dim is None: if self.desired_variance is None: self.output_dim = self.input_dim else: learn_outdim = True # do we need to automatically determine the regularization term? auto_reg = r is None # determine number of output dims, precalculate useful stuff if learn_outdim: Qs, sig2s, nbrss = self._adjust_output_dim() # build the weight matrix #XXX future work: #XXX for faster implementation, W should be a sparse matrix W = numx.zeros((N, N), dtype=self.dtype) if self.verbose: print ' - constructing [%i x %i] weight matrix...' % W.shape for row in range(N): if learn_outdim: Q = Qs[row, :, :] nbrs = nbrss[row, :] else: # ----------------------------------------------- # find k nearest neighbors # ----------------------------------------------- M_Mi = M - M[row] nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1] M_Mi = M_Mi[nbrs] # compute covariance matrix of distances Q = mult(M_Mi, M_Mi.T) # ----------------------------------------------- # compute weight vector based on neighbors # ----------------------------------------------- #Covariance matrix may be nearly singular: # add a diagonal correction to prevent numerical errors if auto_reg: # automatic mode: correction is equal to the sum of # the (d_in-d_out) unused variances (as in deRidder & # Duin) if learn_outdim: sig2 = sig2s[row, :] else: sig2 = svd(M_Mi, compute_uv=0)**2 r = numx.sum(sig2[self.output_dim:]) Q[Q_diag_idx, Q_diag_idx] += r else: # Roweis et al instead use "a correction that # is small compared to the trace" e.g.: # r = 0.001 * float(Q.trace()) # this is equivalent to assuming 0.1% of the variance is unused Q[Q_diag_idx, Q_diag_idx] += r * Q.trace() #solve for weight # weight is w such that sum(Q_ij * w_j) = 1 for all i # XXX refcast is due to numpy bug: floats become double w = self._refcast(numx_linalg.solve(Q, numx.ones(k))) w /= w.sum() #update row of the weight matrix W[nbrs, row] = w if self.verbose: msg = (' - finding [%i x %i] null space of weight matrix\n' ' (may take a while)...' % (self.output_dim, N)) print msg self.W = W.copy() #to find the null space, we need the bottom d+1 # eigenvectors of (W-I).T*(W-I) #Compute this using the svd of (W-I): W[W_diag_idx, W_diag_idx] -= 1. #XXX future work: #XXX use of upcoming ARPACK interface for bottom few eigenvectors #XXX of a sparse matrix will significantly increase the speed #XXX of the next step if self.svd: sig, U = nongeneral_svd(W.T, range=(2, self.output_dim + 1)) else: # the following code does the same computation, but uses # symeig, which computes only the required eigenvectors, and # is much faster. However, it could also be more unstable... WW = mult(W, W.T) # regularizes the eigenvalues, does not change the eigenvectors: WW[W_diag_idx, W_diag_idx] += 0.1 sig, U = symeig(WW, range=(2, self.output_dim + 1), overwrite=True) self.training_projection = U
def _stop_training(self): Cumulator._stop_training(self) k = self.k M = self.data N = M.shape[0] if k > N: err = ('k=%i must be less than' ' or equal to number of training points N=%i' % (k, N)) raise TrainingException(err) if self.verbose: print 'performing HLLE on %i points in %i dimensions...' % M.shape # determines number of output dimensions: if desired_variance # is specified, we need to learn it from the data. Otherwise, # it's easy learn_outdim = False if self.output_dim is None: if self.desired_variance is None: self.output_dim = self.input_dim else: learn_outdim = True # determine number of output dims, precalculate useful stuff if learn_outdim: Qs, sig2s, nbrss = self._adjust_output_dim() d_out = self.output_dim #dp = d_out + (d_out-1) + (d_out-2) + ... dp = d_out * (d_out + 1) / 2 if min(k, N) <= d_out: err = ('k=%i and n=%i (number of input data points) must be' ' larger than output_dim=%i' % (k, N, d_out)) raise TrainingException(err) if k < 1 + d_out + dp: wrn = ('The number of neighbours, k=%i, is smaller than' ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,' ' which might result in unstable results.' % (k, 1 + d_out + dp)) _warnings.warn(wrn, MDPWarning) #build the weight matrix #XXX for faster implementation, W should be a sparse matrix W = numx.zeros((N, dp * N), dtype=self.dtype) if self.verbose: print ' - constructing [%i x %i] weight matrix...' % W.shape for row in range(N): if learn_outdim: nbrs = nbrss[row, :] else: # ----------------------------------------------- # find k nearest neighbors # ----------------------------------------------- M_Mi = M - M[row] nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1] #----------------------------------------------- # center the neighborhood using the mean #----------------------------------------------- nbrhd = M[nbrs] # this makes a copy nbrhd -= nbrhd.mean(0) #----------------------------------------------- # compute local coordinates # using a singular value decomposition #----------------------------------------------- U, sig, VT = svd(nbrhd) nbrhd = U.T[:d_out] del VT #----------------------------------------------- # build Hessian estimator #----------------------------------------------- Yi = numx.zeros((dp, k), dtype=self.dtype) ct = 0 for i in range(d_out): Yi[ct:ct + d_out - i, :] = nbrhd[i] * nbrhd[i:, :] ct += d_out - i Yi = numx.concatenate( [numx.ones((1, k), dtype=self.dtype), nbrhd, Yi], 0) #----------------------------------------------- # orthogonalize linear and quadratic forms # with QR factorization # and make the weights sum to 1 #----------------------------------------------- if k >= 1 + d_out + dp: Q, R = numx_linalg.qr(Yi.T) w = Q[:, d_out + 1:d_out + 1 + dp] else: q, r = _mgs(Yi.T) w = q[:, -dp:] S = w.sum(0) #sum along columns #if S[i] is too small, set it equal to 1.0 # this prevents weights from blowing up S[numx.where(numx.absolute(S) < 1E-4)] = 1.0 #print w.shape, S.shape, (w/S).shape #print W[nbrs, row*dp:(row+1)*dp].shape W[nbrs, row * dp:(row + 1) * dp] = w / S #----------------------------------------------- # To find the null space, we want the # first d+1 eigenvectors of W.T*W # Compute this using an svd of W #----------------------------------------------- if self.verbose: msg = (' - finding [%i x %i] ' 'null space of weight matrix...' % (d_out, N)) print msg #XXX future work: #XXX use of upcoming ARPACK interface for bottom few eigenvectors #XXX of a sparse matrix will significantly increase the speed #XXX of the next step if self.svd: sig, U = nongeneral_svd(W.T, range=(2, d_out + 1)) Y = U * numx.sqrt(N) else: WW = mult(W, W.T) # regularizes the eigenvalues, does not change the eigenvectors: W_diag_idx = numx.arange(N) WW[W_diag_idx, W_diag_idx] += 0.01 sig, U = symeig(WW, range=(2, self.output_dim + 1), overwrite=True) Y = U * numx.sqrt(N) del WW del W #----------------------------------------------- # Normalize Y # # Alternative way to do it: # we need R = (Y.T*Y)^(-1/2) # do this with an SVD of Y del VT # Y = U*sig*V.T # Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T) # = V * (sig*sig.T) * V.T # = V * sig^2 V.T # so # R = V * sig^-1 * V.T # The code is: # U, sig, VT = svd(Y) # del U # S = numx.diag(sig**-1) # self.training_projection = mult(Y, mult(VT.T, mult(S, VT))) #----------------------------------------------- if self.verbose: print ' - normalizing null space...' C = sqrtm(mult(Y.T, Y)) self.training_projection = mult(Y, C)
def _stop_training(self): Cumulator._stop_training(self) if self.verbose: msg = ('training LLE on %i points' ' in %i dimensions...' % (self.data.shape[0], self.data.shape[1])) print msg # some useful quantities M = self.data N = M.shape[0] k = self.k r = self.r # indices of diagonal elements W_diag_idx = numx.arange(N) Q_diag_idx = numx.arange(k) if k > N: err = ('k=%i must be less than or ' 'equal to number of training points N=%i' % (k, N)) raise TrainingException(err) # determines number of output dimensions: if desired_variance # is specified, we need to learn it from the data. Otherwise, # it's easy learn_outdim = False if self.output_dim is None: if self.desired_variance is None: self.output_dim = self.input_dim else: learn_outdim = True # do we need to automatically determine the regularization term? auto_reg = r is None # determine number of output dims, precalculate useful stuff if learn_outdim: Qs, sig2s, nbrss = self._adjust_output_dim() # build the weight matrix #XXX future work: #XXX for faster implementation, W should be a sparse matrix W = numx.zeros((N, N), dtype=self.dtype) if self.verbose: print ' - constructing [%i x %i] weight matrix...' % W.shape for row in range(N): if learn_outdim: Q = Qs[row, :, :] nbrs = nbrss[row, :] else: # ----------------------------------------------- # find k nearest neighbors # ----------------------------------------------- M_Mi = M-M[row] nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1] M_Mi = M_Mi[nbrs] # compute covariance matrix of distances Q = mult(M_Mi, M_Mi.T) # ----------------------------------------------- # compute weight vector based on neighbors # ----------------------------------------------- #Covariance matrix may be nearly singular: # add a diagonal correction to prevent numerical errors if auto_reg: # automatic mode: correction is equal to the sum of # the (d_in-d_out) unused variances (as in deRidder & # Duin) if learn_outdim: sig2 = sig2s[row, :] else: sig2 = svd(M_Mi, compute_uv=0)**2 r = numx.sum(sig2[self.output_dim:]) Q[Q_diag_idx, Q_diag_idx] += r else: # Roweis et al instead use "a correction that # is small compared to the trace" e.g.: # r = 0.001 * float(Q.trace()) # this is equivalent to assuming 0.1% of the variance is unused Q[Q_diag_idx, Q_diag_idx] += r*Q.trace() #solve for weight # weight is w such that sum(Q_ij * w_j) = 1 for all i # XXX refcast is due to numpy bug: floats become double w = self._refcast(numx_linalg.solve(Q, numx.ones(k))) w /= w.sum() #update row of the weight matrix W[nbrs, row] = w if self.verbose: msg = (' - finding [%i x %i] null space of weight matrix\n' ' (may take a while)...' % (self.output_dim, N)) print msg self.W = W.copy() #to find the null space, we need the bottom d+1 # eigenvectors of (W-I).T*(W-I) #Compute this using the svd of (W-I): W[W_diag_idx, W_diag_idx] -= 1. #XXX future work: #XXX use of upcoming ARPACK interface for bottom few eigenvectors #XXX of a sparse matrix will significantly increase the speed #XXX of the next step if self.svd: sig, U = nongeneral_svd(W.T, range=(2, self.output_dim+1)) else: # the following code does the same computation, but uses # symeig, which computes only the required eigenvectors, and # is much faster. However, it could also be more unstable... WW = mult(W, W.T) # regularizes the eigenvalues, does not change the eigenvectors: WW[W_diag_idx, W_diag_idx] += 0.1 sig, U = symeig(WW, range=(2, self.output_dim+1), overwrite=True) self.training_projection = U
def _stop_training(self): Cumulator._stop_training(self) k = self.k M = self.data N = M.shape[0] if k > N: err = ('k=%i must be less than' ' or equal to number of training points N=%i' % (k, N)) raise TrainingException(err) if self.verbose: print 'performing HLLE on %i points in %i dimensions...' % M.shape # determines number of output dimensions: if desired_variance # is specified, we need to learn it from the data. Otherwise, # it's easy learn_outdim = False if self.output_dim is None: if self.desired_variance is None: self.output_dim = self.input_dim else: learn_outdim = True # determine number of output dims, precalculate useful stuff if learn_outdim: Qs, sig2s, nbrss = self._adjust_output_dim() d_out = self.output_dim #dp = d_out + (d_out-1) + (d_out-2) + ... dp = d_out*(d_out+1)/2 if min(k, N) <= d_out: err = ('k=%i and n=%i (number of input data points) must be' ' larger than output_dim=%i' % (k, N, d_out)) raise TrainingException(err) if k < 1+d_out+dp: wrn = ('The number of neighbours, k=%i, is smaller than' ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,' ' which might result in unstable results.' % (k, 1+d_out+dp)) _warnings.warn(wrn, MDPWarning) #build the weight matrix #XXX for faster implementation, W should be a sparse matrix W = numx.zeros((N, dp*N), dtype=self.dtype) if self.verbose: print ' - constructing [%i x %i] weight matrix...' % W.shape for row in range(N): if learn_outdim: nbrs = nbrss[row, :] else: # ----------------------------------------------- # find k nearest neighbors # ----------------------------------------------- M_Mi = M-M[row] nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1] #----------------------------------------------- # center the neighborhood using the mean #----------------------------------------------- nbrhd = M[nbrs] # this makes a copy nbrhd -= nbrhd.mean(0) #----------------------------------------------- # compute local coordinates # using a singular value decomposition #----------------------------------------------- U, sig, VT = svd(nbrhd) nbrhd = U.T[:d_out] del VT #----------------------------------------------- # build Hessian estimator #----------------------------------------------- Yi = numx.zeros((dp, k), dtype=self.dtype) ct = 0 for i in range(d_out): Yi[ct:ct+d_out-i, :] = nbrhd[i] * nbrhd[i:, :] ct += d_out-i Yi = numx.concatenate([numx.ones((1, k), dtype=self.dtype), nbrhd, Yi], 0) #----------------------------------------------- # orthogonalize linear and quadratic forms # with QR factorization # and make the weights sum to 1 #----------------------------------------------- if k >= 1+d_out+dp: Q, R = numx_linalg.qr(Yi.T) w = Q[:, d_out+1:d_out+1+dp] else: q, r = _mgs(Yi.T) w = q[:, -dp:] S = w.sum(0) #sum along columns #if S[i] is too small, set it equal to 1.0 # this prevents weights from blowing up S[numx.where(numx.absolute(S)<1E-4)] = 1.0 #print w.shape, S.shape, (w/S).shape #print W[nbrs, row*dp:(row+1)*dp].shape W[nbrs, row*dp:(row+1)*dp] = w / S #----------------------------------------------- # To find the null space, we want the # first d+1 eigenvectors of W.T*W # Compute this using an svd of W #----------------------------------------------- if self.verbose: msg = (' - finding [%i x %i] ' 'null space of weight matrix...' % (d_out, N)) print msg #XXX future work: #XXX use of upcoming ARPACK interface for bottom few eigenvectors #XXX of a sparse matrix will significantly increase the speed #XXX of the next step if self.svd: sig, U = nongeneral_svd(W.T, range=(2, d_out+1)) Y = U*numx.sqrt(N) else: WW = mult(W, W.T) # regularizes the eigenvalues, does not change the eigenvectors: W_diag_idx = numx.arange(N) WW[W_diag_idx, W_diag_idx] += 0.01 sig, U = symeig(WW, range=(2, self.output_dim+1), overwrite=True) Y = U*numx.sqrt(N) del WW del W #----------------------------------------------- # Normalize Y # # Alternative way to do it: # we need R = (Y.T*Y)^(-1/2) # do this with an SVD of Y del VT # Y = U*sig*V.T # Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T) # = V * (sig*sig.T) * V.T # = V * sig^2 V.T # so # R = V * sig^-1 * V.T # The code is: # U, sig, VT = svd(Y) # del U # S = numx.diag(sig**-1) # self.training_projection = mult(Y, mult(VT.T, mult(S, VT))) #----------------------------------------------- if self.verbose: print ' - normalizing null space...' C = sqrtm(mult(Y.T, Y)) self.training_projection = mult(Y, C)
def testSFANode_rank_deficit(): def test_for_data(dat, dat0, dfc, out, eq_pr_dict=None, rk_thr_dict=None, check_data=None, check_dfc=None): if eq_pr_dict is None: eq_pr_dict = {'reg': 5, 'pca': 5, 'svd': 5, 'ldl': 5} if check_data is None: check_data = {'reg': True, 'pca': True, 'svd': True, 'ldl': True} if check_dfc is None: check_dfc = {'reg': True, 'pca': True, 'svd': True, 'ldl': True} if rk_thr_dict is None: rk_thr_dict = { \ 'reg': 1e-10, 'pca': 1e-10, 'svd': 1e-10, 'ldl': 1e-10} sfa0 = mdp.nodes.SFANode(output_dim=out) sfa0.train(dat0) sfa0.stop_training() sdat0 = sfa0.execute(dat0) sfa2_reg = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='reg') sfa2_reg.rank_threshold = rk_thr_dict['reg'] # This is equivalent to sfa2._sfa_solver = sfa2._rank_deficit_solver_reg sfa2_reg.train(dat) sfa2_reg.stop_training() sdat_reg = sfa2_reg.execute(dat) sfa2_pca = mdp.nodes.SFANode(output_dim=out) # For this test we add the rank_deficit_solver later, so we can # assert that ordinary SFA would actually fail on the data. sfa2_pca.train(dat) try: sfa2_pca.stop_training() # Assert that with dfc > 0 ordinary SFA wouldn't reach this line. assert dfc == 0 except mdp.NodeException: sfa2_pca.set_rank_deficit_method('pca') sfa2_pca.rank_threshold = rk_thr_dict['pca'] sfa2_pca.stop_training() sdat_pca = sfa2_pca.execute(dat) sfa2_svd = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='svd') sfa2_svd.rank_threshold = rk_thr_dict['svd'] sfa2_svd.train(dat) sfa2_svd.stop_training() sdat_svd = sfa2_svd.execute(dat) def matrix_cmp(A, B): assert_array_almost_equal(abs(A), abs(B)) return True if check_data['reg']: assert_array_almost_equal(abs(sdat_reg), abs(sdat0), eq_pr_dict['reg']) if check_data['pca']: assert_array_almost_equal(abs(sdat_pca), abs(sdat0), eq_pr_dict['pca']) if check_data['svd']: assert_array_almost_equal(abs(sdat_svd), abs(sdat0), eq_pr_dict['svd']) reg_dfc = sfa2_reg.rank_deficit == dfc pca_dfc = sfa2_pca.rank_deficit == dfc svd_dfc = sfa2_svd.rank_deficit == dfc if reg_dfc: assert_array_almost_equal( sfa2_reg.d, sfa0.d, eq_pr_dict['reg']) if pca_dfc: assert_array_almost_equal( sfa2_pca.d, sfa0.d, eq_pr_dict['pca']) if svd_dfc: assert_array_almost_equal( sfa2_svd.d, sfa0.d, eq_pr_dict['svd']) # check that constraints are met idn = numx.identity(out) d_diag = numx.diag(sfa0.d) # reg ok? assert_array_almost_equal( mult(sdat_reg.T, sdat_reg)/(len(sdat_reg)-1), idn, eq_pr_dict['reg']) sdat_reg_d = sdat_reg[1:]-sdat_reg[:-1] assert_array_almost_equal( mult(sdat_reg_d.T, sdat_reg_d)/(len(sdat_reg_d)-1), d_diag, eq_pr_dict['reg']) # pca ok? assert_array_almost_equal( mult(sdat_pca.T, sdat_pca)/(len(sdat_pca)-1), idn, eq_pr_dict['pca']) sdat_pca_d = sdat_pca[1:]-sdat_pca[:-1] assert_array_almost_equal( mult(sdat_pca_d.T, sdat_pca_d)/(len(sdat_pca_d)-1), d_diag, eq_pr_dict['pca']) # svd ok? assert_array_almost_equal( mult(sdat_svd.T, sdat_svd)/(len(sdat_svd)-1), idn, eq_pr_dict['svd']) sdat_svd_d = sdat_svd[1:]-sdat_svd[:-1] assert_array_almost_equal( mult(sdat_svd_d.T, sdat_svd_d)/(len(sdat_svd_d)-1), d_diag, eq_pr_dict['svd']) try: # test ldl separately due to its requirement of SciPy >= 1.0 sfa2_ldl = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='ldl') sfa2_ldl.rank_threshold = rk_thr_dict['ldl'] have_ldl = True except NodeException: # No SciPy >= 1.0 available. have_ldl = False if have_ldl: sfa2_ldl.train(dat) sfa2_ldl.stop_training() sdat_ldl = sfa2_ldl.execute(dat) if check_data['ldl']: assert_array_almost_equal(abs(sdat_ldl), abs(sdat0), eq_pr_dict['ldl']) ldl_dfc = sfa2_ldl.rank_deficit == dfc if ldl_dfc: assert_array_almost_equal( sfa2_ldl.d, sfa0.d, eq_pr_dict['ldl']) # check that constraints are met # ldl ok? assert_array_almost_equal( mult(sdat_ldl.T, sdat_ldl)/(len(sdat_ldl)-1), idn, eq_pr_dict['ldl']) sdat_ldl_d = sdat_ldl[1:]-sdat_ldl[:-1] assert_array_almost_equal( mult(sdat_ldl_d.T, sdat_ldl_d)/(len(sdat_ldl_d)-1), d_diag, eq_pr_dict['ldl']) else: ldl_dfc = None ldl_dfc2 = ldl_dfc is True or (ldl_dfc is None and not have_ldl) assert all((reg_dfc or not check_dfc['reg'], pca_dfc or not check_dfc['pca'], svd_dfc or not check_dfc['svd'], ldl_dfc2 or not check_dfc['ldl'])), \ "Rank deficit ok? reg: %s, pca: %s, svd: %s, ldl: %s" % \ (reg_dfc, pca_dfc, svd_dfc, ldl_dfc) return sfa2_pca.d # ============test with random data: dfc_max = 200 dat_dim = 500 dat_smpl = 10000 dat = numx.random.rand(dat_smpl, dat_dim) # test data dfc = numx.random.randint(0, dfc_max) # rank deficit out = numx.random.randint(4, dat_dim-50-dfc) # output dim # We add some linear redundancy to the data... if dfc > 0: # dfc is how many dimensions we overwrite with duplicates # This should yield an overal rank deficit of dfc dat0 = dat[:, :-dfc] # for use by ordinary SFA dat[:, -dfc:] = dat[:, :dfc] else: dat0 = dat test_for_data(dat, dat0, dfc, out) # We mix the redundancy a bit more with the other data and test again... if dfc > 0: # This should yield an overal rank deficit of dfc ovl = numx.random.randint(0, dat_dim-max(out, dfc_max)) # We generate a random, yet orthogonal matrix M for mixing: M = numx.random.rand(dfc+ovl, dfc+ovl) _, M = symeig(M+M.T) dat[:, -(dfc+ovl):] = dat[:, -(dfc+ovl):].dot(M) # We test again with mixing matrix applied test_for_data(dat, dat0, dfc, out) # ============test with nasty data: # Create another set of data... dat = numx.random.rand(dat_smpl, dat_dim) # test data dfc = numx.random.randint(0, dfc_max) # rank deficit out = numx.random.randint(4, dat_dim-50-dfc) # output dim # We add some linear redundancy to the data... if dfc > 0: # dfc is how many dimensions we overwrite with duplicates # This should yield an overal rank deficit of dfc dat0 = dat[:, :-dfc] # for use by ordinary SFA dat[:, -dfc:] = dat[:, :dfc] else: dat0 = dat # And additionally add a very slow actual feature... dat[:, dfc] = numx.arange(dat_smpl) # We mute some checks here because they sometimes fail check_data = {'reg': False, 'pca': False, 'svd': False, 'ldl': False} check_dfc = {'reg': False, 'pca': False, 'svd': False, 'ldl': False} # Note: In most cases accuracy is much higher than checked here. eq_pr_dict = {'reg': 2, 'pca': 2, 'svd': 2, 'ldl': 2} rk_thr_dict = {'reg': 1e-8, 'pca': 1e-7, 'svd': 1e-7, 'ldl': 1e-6} # Here we assert the very slow but actual feature is not filtered out: assert test_for_data(dat, dat0, dfc, out, eq_pr_dict, rk_thr_dict, check_data, check_dfc)[0] < 1e-5 # We mix the redundancy a bit more with the other data and test again... if dfc > 0: # This should yield an overal rank deficit of dfc ovl = numx.random.randint(0, dat_dim-max(out, dfc_max)) # We generate a random, yet orthogonal matrix M for mixing: M = numx.random.rand(dfc+ovl, dfc+ovl) _, M = symeig(M+M.T) dat[:, -(dfc+ovl):] = dat[:, -(dfc+ovl):].dot(M) # We test again with mixing matrix applied # Again we assert the very slow but actual feature is not filtered out: assert test_for_data(dat, dat0, dfc, out, eq_pr_dict, rk_thr_dict, check_data, check_dfc)[0] < 1e-5