def test_mult_diag(): dim = 20 d = numx_rand.random(size=(dim,)) dd = numx.diag(d) mtx = numx_rand.random(size=(dim, dim)) res1 = utils.mult(dd, mtx) res2 = utils.mult_diag(d, mtx, left=True) assert_array_almost_equal(res1, res2, 10) res1 = utils.mult(mtx, dd) res2 = utils.mult_diag(d, mtx, left=False) assert_array_almost_equal(res1, res2, 10)
def _stop_training(self, debug=False): """Stop the training phase. Keyword arguments: debug=True if stop_training fails because of singular cov matrices, the singular matrices itselves are stored in self.cov_mtx and self.dcov_mtx to be examined. """ # request the covariance matrix and clean up self.cov_mtx, avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # this is a bit counterintuitive, as it reshapes the average vector to # be a matrix. in this way, however, we spare the reshape # operation every time that 'execute' is called. self.avg = avg.reshape(1, avg.shape[0]) # range for the eigenvalues rng = self._adjust_output_dim() # if we have more variables then observations we are bound to fail here # suggest to use the NIPALSNode instead. if debug and self.tlen < self.input_dim: wrn = ( "The number of observations (%d) " "is larger than the number of input variables " "(%d). You may want to use " "the NIPALSNode instead." % (self.tlen, self.input_dim) ) _warnings.warn(wrn, mdp.MDPWarning) # total variance can be computed at this point: # note that vartot == d.sum() vartot = numx.diag(self.cov_mtx).sum() ## compute and sort the eigenvalues # compute the eigenvectors of the covariance matrix (inplace) # (eigenvalues sorted in ascending order) try: d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug)) # if reduce=False and svd=False. we should check for # negative eigenvalues and fail if not (self.reduce or self.svd or (self.desired_variance is not None)): if d.min() < 0: raise mdp.NodeException( "Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller, " "or set reduce=True and/or svd=True" % str(d) ) except SymeigException, exception: err = str(exception) + ("\nCovariance matrix may be singular." "Try setting svd=True.") raise mdp.NodeException(err)
def _stop_training(self, debug=False): """Stop the training phase. Keyword arguments: debug=True if stop_training fails because of singular cov matrices, the singular matrices itselves are stored in self.cov_mtx and self.dcov_mtx to be examined. """ # request the covariance matrix and clean up self.cov_mtx, avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # this is a bit counterintuitive, as it reshapes the average vector to # be a matrix. in this way, however, we spare the reshape # operation every time that 'execute' is called. self.avg = avg.reshape(1, avg.shape[0]) # range for the eigenvalues rng = self._adjust_output_dim() # if we have more variables then observations we are bound to fail here # suggest to use the NIPALSNode instead. if debug and self.tlen < self.input_dim: wrn = ('The number of observations (%d) ' 'is larger than the number of input variables ' '(%d). You may want to use ' 'the NIPALSNode instead.' % (self.tlen, self.input_dim)) _warnings.warn(wrn, mdp.MDPWarning) # total variance can be computed at this point: # note that vartot == d.sum() vartot = numx.diag(self.cov_mtx).sum() ## compute and sort the eigenvalues # compute the eigenvectors of the covariance matrix (inplace) # (eigenvalues sorted in ascending order) try: d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug)) # if reduce=False and svd=False. we should check for # negative eigenvalues and fail if not (self.reduce or self.svd or (self.desired_variance is not None)): if d.min() < 0: raise mdp.NodeException( "Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller, " "or set reduce=True and/or svd=True" % str(d)) except SymeigException, exception: err = str(exception) + ("\nCovariance matrix may be singular." "Try setting svd=True.") raise mdp.NodeException(err)
def symeig_semidefinite_svd(A, B=None, eigenvectors=True, turbo="on", range=None, type=1, overwrite=False, rank_threshold=1e-12, dfc_out=None): """ SVD-based routine to solve generalized symmetric positive semidefinite eigenvalue problems. This can be used if the normal ``symeig()`` call in ``_stop_training()`` throws ``SymeigException('Covariance matrices may be singular')``. This solver's computational cost depends on the underlying SVD implementation. Its dominant cost factor consists of two SVD runs. For details on the used algorithm see: http://www.geo.tuwien.ac.at/downloads/tm/svd.pdf (section 0.3.2) .. note:: The parameters eigenvectors, turbo, type, overwrite are not used. They only exist to provide a symeig-compatible signature. The signature of this function equals that of ``mdp.utils.symeig``, but has two additional parameters: :param rank_threshold: A threshold to determine if an eigenvalue counts as zero. :type rank_threshold: float :param dfc_out: If ``dfc_out`` is not ``None``, ``dfc_out.rank_deficit`` will be set to an integer indicating how many zero-eigenvalues were detected. """ if type != 1: raise ValueError('Only type=1 is supported.') mult = mdp.utils.mult # SVD-based method appears to be particularly unstable if blank lines # and columns exist in B. So we circumvent this case: nonzero_idx = _find_blank_data_idx(B, rank_threshold) if not nonzero_idx is None: orig_shape = B.shape B = B[nonzero_idx, :][:, nonzero_idx] A = A[nonzero_idx, :][:, nonzero_idx] dcov_mtx = A cov_mtx = B U, s, _ = mdp.utils.svd(cov_mtx) off = 0 while s[-1 - off] < rank_threshold: off += 1 if off > 0: if not dfc_out is None: dfc_out.rank_deficit = off s = s[:-off] U = U[:, :-off] X1 = mult(U, numx.diag(1.0 / s**0.5)) X2, _, _ = mdp.utils.svd(mult(X1.T, mult(dcov_mtx, X1))) E = mult(X1, X2) e = mult(E.T, mult(dcov_mtx, E)).diagonal() e = e[:: -1] # SVD delivers the eigenvalues sorted in reverse (compared to symeig). Thus E = E.T[:: -1].T # we manually reverse the array/matrix storing the eigenvalues/vectors. if not range is None: e = e[range[0] - 1:range[1]] E = E[:, range[0] - 1:range[1]] if not nonzero_idx is None: # restore ev to original size if not dfc_out is None: dfc_out.rank_deficit += orig_shape[0] - len(nonzero_idx) E_tmp = E E = numx.zeros((orig_shape[0], E.shape[1])) E[nonzero_idx, :] = E_tmp return e, E
def symeig_semidefinite_ldl(A, B=None, eigenvectors=True, turbo="on", rng=None, type=1, overwrite=False, rank_threshold=1e-12, dfc_out=None): """LDL-based routine to solve generalized symmetric positive semidefinite eigenvalue problems. This can be used if the normal ``symeig()`` call in ``_stop_training()`` throws ``SymeigException('Covariance matrices may be singular')``. This solver uses SciPy's raw LAPACK interface to access LDL decomposition. http://www.netlib.org/lapack/lug/node54.html describes how to solve a generalized eigenvalue problem with positive definite B using Cholesky/LL decomposition. We extend this method to solve for positive semidefinite B using LDL decomposition, which is a variant of Cholesky/LL decomposition for indefinite Matrices. Accessing raw LAPACK's LDL decomposition (sytrf) is challenging. This code is partly based on code for SciPy 1.1: http://github.com/scipy/scipy/pull/7941/files#diff-9bf9b4b2f0f40415bc0e72143584c889 We optimized and shortened that code for the real-valued positive semidefinite case. This procedure is almost as efficient as the ordinary eigh implementation. This is because implementations for symmetric generalized eigenvalue problems usually perform the Cholesky approach mentioned above. The more general LDL decomposition is only slightly more expensive than Cholesky, due to pivotization. .. note:: This method requires SciPy >= 1.0. The signature of this function equals that of ``mdp.utils.symeig``, but has two additional parameters: :param rank_threshold: A threshold to determine if an eigenvalue counts as zero. :type rank_threshold: float :param dfc_out: If ``dfc_out`` is not ``None``, ``dfc_out.rank_deficit`` will be set to an integer indicating how many zero-eigenvalues were detected. """ if type != 1: raise ValueError('Only type=1 is supported.') # LDL-based method appears to be particularly unstable if blank lines # and columns exist in B. So we circumvent this case: nonzero_idx = _find_blank_data_idx(B, rank_threshold) if not nonzero_idx is None: orig_shape = B.shape B = B[nonzero_idx, :][:, nonzero_idx] A = A[nonzero_idx, :][:, nonzero_idx] # This method has special requirements, which is why we import here # rather than module wide. from scipy.linalg.lapack import get_lapack_funcs, _compute_lwork from scipy.linalg.blas import get_blas_funcs try: inv_tri, solver, solver_lwork = get_lapack_funcs( ('trtri', 'sytrf', 'sytrf_lwork'), (B, )) mult_tri, = get_blas_funcs(('trmm', ), (B, )) except ValueError: err_msg = ("ldl method for solving symeig with rank deficit B " "requires at least SciPy 1.0.") raise SymeigException(err_msg) n = B.shape[0] arng = numx.arange(n) lwork = _compute_lwork(solver_lwork, n, lower=1) lu, piv, _ = solver(B, lwork=lwork, lower=1, overwrite_a=overwrite) # using piv properly requires some postprocessing: swap_ = numx.arange(n) pivs = numx.zeros(swap_.shape, dtype=int) skip_2x2 = False for ind in range(n): # If previous spin belonged already to a 2x2 block if skip_2x2: skip_2x2 = False continue cur_val = piv[ind] # do we have a 1x1 block or not? if cur_val > 0: if cur_val != ind + 1: # Index value != array value --> permutation required swap_[ind] = swap_[cur_val - 1] pivs[ind] = 1 # Not. elif cur_val < 0 and cur_val == piv[ind + 1]: # first neg entry of 2x2 block identifier if -cur_val != ind + 2: # Index value != array value --> permutation required swap_[ind + 1] = swap_[-cur_val - 1] pivs[ind] = 2 skip_2x2 = True full_perm = numx.arange(n) for ind in range(n - 1, -1, -1): s_ind = swap_[ind] if s_ind != ind: col_s = ind if pivs[ind] else ind - 1 # 2x2 block lu[[s_ind, ind], col_s:] = lu[[ind, s_ind], col_s:] full_perm[[s_ind, ind]] = full_perm[[ind, s_ind]] # usually only a few indices actually permute, so we reduce perm: perm = (full_perm - arng).nonzero()[0] perm_idx = full_perm[perm] # end of ldl postprocessing # perm_idx and perm now describe a permutation as dest and source indexes lu[perm_idx, :] = lu[perm, :] dgd = abs(numx.diag(lu)) dnz = (dgd > rank_threshold).nonzero()[0] dgd_sqrt_I = numx.sqrt(1.0 / dgd[dnz]) rank_deficit = len(dgd) - len(dnz) # later used # c, lower, unitdiag, overwrite_c LI, _ = inv_tri(lu, 1, 1, 1) # invert triangular # we mainly apply tril here, because we need to make a # copy of LI anyway, because original result from # dtrtri seems to be read-only regarding some operations LI = numx.tril(LI, -1) LI[arng, arng] = 1 LI[dnz, :] *= dgd_sqrt_I.reshape((dgd_sqrt_I.shape[0], 1)) A2 = A if overwrite else A.copy() A2[perm_idx, :] = A2[perm, :] A2[:, perm_idx] = A2[:, perm] # alpha, a, b, side 0=left 1=right, lower, trans_a, diag 1=unitdiag, # overwrite_b A2 = mult_tri(1.0, LI, A2, 1, 1, 1, 0, 1) # A2 = mult(A2, LI.T) A2 = mult_tri(1.0, LI, A2, 0, 1, 0, 0, 1) # A2 = mult(LI, A2) A2 = A2[dnz, :] A2 = A2[:, dnz] # overwrite=True is okay here, because at this point A2 is a copy anyway eg, ev = mdp.utils.symeig(A2, None, True, turbo, rng, overwrite=True) ev = mdp.utils.mult(LI[dnz].T, ev) if rank_deficit \ else mult_tri(1.0, LI, ev, 0, 1, 1, 0, 1) ev[perm] = ev[perm_idx] if not nonzero_idx is None: # restore ev to original size rank_deficit += orig_shape[0] - len(nonzero_idx) ev_tmp = ev ev = numx.zeros((orig_shape[0], ev.shape[1])) ev[nonzero_idx, :] = ev_tmp if not dfc_out is None: dfc_out.rank_deficit = rank_deficit return eg, ev
def _stop_training(self, debug=False): """Stop the training phase. :param debug: Determines if singular matrices itself are stored in self.cov_mtx and self.dcov_mtx to be examined, given that stop_training fails because of singular covmatrices. Default is False. :type debug: bool :raises mdp.NodeException: If negative eigenvalues occur, the covariance matrix may be singular or no component amounts to variation exceeding var_abs. """ # request the covariance matrix and clean up self.cov_mtx, avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # this is a bit counterintuitive, as it reshapes the average vector to # be a matrix. in this way, however, we spare the reshape # operation every time that 'execute' is called. self.avg = avg.reshape(1, avg.shape[0]) # range for the eigenvalues rng = self._adjust_output_dim() # if we have more variables then observations we are bound to fail here # suggest to use the NIPALSNode instead. if debug and self.tlen < self.input_dim: wrn = ('The number of observations (%d) ' 'is larger than the number of input variables ' '(%d). You may want to use ' 'the NIPALSNode instead.' % (self.tlen, self.input_dim)) _warnings.warn(wrn, mdp.MDPWarning) # total variance can be computed at this point: # note that vartot == d.sum() vartot = numx.diag(self.cov_mtx).sum() ## compute and sort the eigenvalues # compute the eigenvectors of the covariance matrix (inplace) # (eigenvalues sorted in ascending order) try: d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug)) # if reduce=False and svd=False. we should check for # negative eigenvalues and fail if not (self.reduce or self.svd or (self.desired_variance is not None)): if d.min() < 0: raise mdp.NodeException( "Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller, " "or set reduce=True and/or svd=True" % str(d)) except SymeigException as exception: err = str(exception) + ("\nCovariance matrix may be singular." "Try setting svd=True.") raise mdp.NodeException(err) # delete covariance matrix if no exception occurred if not debug: del self.cov_mtx # sort by descending order d = numx.take(d, list(range(d.shape[0] - 1, -1, -1))) v = v[:, ::-1] if self.desired_variance is not None: # throw away immediately negative eigenvalues d = d[d > 0] # the number of principal components to keep has # been specified by the fraction of variance to be explained varcum = (old_div(d, vartot)).cumsum(axis=0) # select only the relevant eigenvalues # number of relevant eigenvalues neigval = int(varcum.searchsorted(self.desired_variance) + 1.) #self.explained_variance = varcum[neigval-1] # cut d = d[0:neigval] v = v[:, 0:neigval] # define the new output dimension self.output_dim = int(neigval) # automatic dimensionality reduction if self.reduce: # remove entries that are smaller then var_abs and # smaller then var_rel relative to the maximum d = d[d > self.var_abs] # check that we did not throw away everything if len(d) == 0: raise mdp.NodeException('No eigenvalues larger than' ' var_abs=%e!' % self.var_abs) d = d[old_div(d, d.max()) > self.var_rel] # filter for variance relative to total variance if self.var_part: d = d[old_div(d, vartot) > self.var_part] v = v[:, 0:d.shape[0]] self._output_dim = d.shape[0] # set explained variance self.explained_variance = old_div(d.sum(), vartot) # store the eigenvalues self.d = d # store the eigenvectors self.v = v # store the total variance self.total_variance = vartot
def symeig_semidefinite_svd( A, B = None, eigenvectors=True, turbo="on", range=None, type=1, overwrite=False, rank_threshold=1e-12, dfc_out=None): """ SVD-based routine to solve generalized symmetric positive semidefinite eigenvalue problems. This can be used in case the normal symeig() call in _stop_training() throws SymeigException ('Covariance matrices may be singular'). This solver's computational cost depends on the underlying SVD implementation. Its dominant cost factor consists of two SVD runs. rank_threshold=1e-12 dfc_out=None For details on the used algorithm see: http://www.geo.tuwien.ac.at/downloads/tm/svd.pdf (section 0.3.2) The signature of this function equals that of mdp.utils.symeig, but has two additional parameters: rank_threshold: A threshold to determine if an eigenvalue counts as zero. dfc_out: If dfc_out is not None dfc_out.rank_deficit will be set to an integer indicating how many zero-eigenvalues were detected. Note: The parameters eigenvectors, turbo, type, overwrite are not used. They only exist to provide a symeig compatible signature. """ if type != 1: raise ValueError('Only type=1 is supported.') mult = mdp.utils.mult # SVD-based method appears to be particularly unstable if blank lines # and columns exist in B. So we circumvent this case: nonzero_idx = _find_blank_data_idx(B, rank_threshold) if not nonzero_idx is None: orig_shape = B.shape B = B[nonzero_idx, :][:, nonzero_idx] A = A[nonzero_idx, :][:, nonzero_idx] dcov_mtx = A cov_mtx = B U, s, _ = mdp.utils.svd(cov_mtx) off = 0 while s[-1-off] < rank_threshold: off += 1 if off > 0: if not dfc_out is None: dfc_out.rank_deficit = off s = s[:-off] U = U[:, :-off] X1 = mult(U, numx.diag(1.0 / s ** 0.5)) X2, _, _ = mdp.utils.svd(mult(X1.T, mult(dcov_mtx, X1))) E = mult(X1, X2) e = mult(E.T, mult(dcov_mtx, E)).diagonal() e = e[::-1] # SVD delivers the eigenvalues sorted in reverse (compared to symeig). Thus E = E.T[::-1].T # we manually reverse the array/matrix storing the eigenvalues/vectors. if not range is None: e = e[range[0] - 1:range[1]] E = E[:, range[0] - 1:range[1]] if not nonzero_idx is None: # restore ev to original size if not dfc_out is None: dfc_out.rank_deficit += orig_shape[0]-len(nonzero_idx) E_tmp = E E = numx.zeros((orig_shape[0], E.shape[1])) E[nonzero_idx, :] = E_tmp return e, E
def symeig_semidefinite_ldl( A, B = None, eigenvectors=True, turbo="on", rng=None, type=1, overwrite=False, rank_threshold=1e-12, dfc_out=None): """ LDL-based routine to solve generalized symmetric positive semidefinite eigenvalue problems. This can be used in case the normal symeig() call in _stop_training() throws SymeigException ('Covariance matrices may be singular'). This solver uses SciPy's raw LAPACK interface to access LDL decomposition. www.netlib.org/lapack/lug/node54.html describes how to solve a generalized eigenvalue problem with positive definite B using Cholesky/LL decomposition. We extend this method to solve for positive semidefinite B using LDL decomposition, which is a variant of Cholesky/LL decomposition for indefinite Matrices. Accessing raw LAPACK's LDL decomposition (sytrf) is challenging. This code is partly based on code for SciPy 1.1: github.com/scipy/scipy/pull/7941/files#diff-9bf9b4b2f0f40415bc0e72143584c889 We optimized and shortened that code for the real-valued positive semidefinite case. This procedure is almost as efficient as the ordinary eigh implementation. This is because implementations for symmetric generalized eigenvalue problems usually perform the Cholesky approach mentioned above. The more general LDL decomposition is only slightly more expensive than Cholesky, due to pivotization. The signature of this function equals that of mdp.utils.symeig, but has two additional parameters: rank_threshold: A threshold to determine if an eigenvalue counts as zero. dfc_out: If dfc_out is not None dfc_out.rank_deficit will be set to an integer indicating how many zero-eigenvalues were detected. Note: This method requires SciPy >= 1.0. """ if type != 1: raise ValueError('Only type=1 is supported.') # LDL-based method appears to be particularly unstable if blank lines # and columns exist in B. So we circumvent this case: nonzero_idx = _find_blank_data_idx(B, rank_threshold) if not nonzero_idx is None: orig_shape = B.shape B = B[nonzero_idx, :][:, nonzero_idx] A = A[nonzero_idx, :][:, nonzero_idx] # This method has special requirements, which is why we import here # rather than module wide. from scipy.linalg.lapack import get_lapack_funcs, _compute_lwork from scipy.linalg.blas import get_blas_funcs try: inv_tri, solver, solver_lwork = get_lapack_funcs( ('trtri', 'sytrf', 'sytrf_lwork'), (B,)) mult_tri, = get_blas_funcs(('trmm',), (B,)) except ValueError: err_msg = ("ldl method for solving symeig with rank deficit B " "requires at least SciPy 1.0.") raise SymeigException(err_msg) n = B.shape[0] arng = numx.arange(n) lwork = _compute_lwork(solver_lwork, n, lower=1) lu, piv, _ = solver(B, lwork=lwork, lower=1, overwrite_a=overwrite) # using piv properly requires some postprocessing: swap_ = numx.arange(n) pivs = numx.zeros(swap_.shape, dtype=int) skip_2x2 = False for ind in range(n): # If previous spin belonged already to a 2x2 block if skip_2x2: skip_2x2 = False continue cur_val = piv[ind] # do we have a 1x1 block or not? if cur_val > 0: if cur_val != ind+1: # Index value != array value --> permutation required swap_[ind] = swap_[cur_val-1] pivs[ind] = 1 # Not. elif cur_val < 0 and cur_val == piv[ind+1]: # first neg entry of 2x2 block identifier if -cur_val != ind+2: # Index value != array value --> permutation required swap_[ind+1] = swap_[-cur_val-1] pivs[ind] = 2 skip_2x2 = True full_perm = numx.arange(n) for ind in range(n-1, -1, -1): s_ind = swap_[ind] if s_ind != ind: col_s = ind if pivs[ind] else ind-1 # 2x2 block lu[[s_ind, ind], col_s:] = lu[[ind, s_ind], col_s:] full_perm[[s_ind, ind]] = full_perm[[ind, s_ind]] # usually only a few indices actually permute, so we reduce perm: perm = (full_perm-arng).nonzero()[0] perm_idx = full_perm[perm] # end of ldl postprocessing # perm_idx and perm now describe a permutation as dest and source indexes lu[perm_idx, :] = lu[perm, :] dgd = abs(numx.diag(lu)) dnz = (dgd > rank_threshold).nonzero()[0] dgd_sqrt_I = numx.sqrt(1.0/dgd[dnz]) rank_deficit = len(dgd) - len(dnz) # later used # c, lower, unitdiag, overwrite_c LI, _ = inv_tri(lu, 1, 1, 1) # invert triangular # we mainly apply tril here, because we need to make a # copy of LI anyway, because original result from # dtrtri seems to be read-only regarding some operations LI = numx.tril(LI, -1) LI[arng, arng] = 1 LI[dnz, :] *= dgd_sqrt_I.reshape((dgd_sqrt_I.shape[0], 1)) A2 = A if overwrite else A.copy() A2[perm_idx, :] = A2[perm, :] A2[:, perm_idx] = A2[:, perm] # alpha, a, b, side 0=left 1=right, lower, trans_a, diag 1=unitdiag, # overwrite_b A2 = mult_tri(1.0, LI, A2, 1, 1, 1, 0, 1) # A2 = mult(A2, LI.T) A2 = mult_tri(1.0, LI, A2, 0, 1, 0, 0, 1) # A2 = mult(LI, A2) A2 = A2[dnz, :] A2 = A2[:, dnz] # overwrite=True is okay here, because at this point A2 is a copy anyway eg, ev = mdp.utils.symeig(A2, None, True, turbo, rng, overwrite=True) ev = mdp.utils.mult(LI[dnz].T, ev) if rank_deficit \ else mult_tri(1.0, LI, ev, 0, 1, 1, 0, 1) ev[perm] = ev[perm_idx] if not nonzero_idx is None: # restore ev to original size rank_deficit += orig_shape[0]-len(nonzero_idx) ev_tmp = ev ev = numx.zeros((orig_shape[0], ev.shape[1])) ev[nonzero_idx, :] = ev_tmp if not dfc_out is None: dfc_out.rank_deficit = rank_deficit return eg, ev
def _stop_training(self, debug=False): """Stop the training phase. :param debug: Determines if singular matrices itself are stored in self.cov_mtx and self.dcov_mtx to be examined, given that stop_training fails because of singular covmatrices. Default is False. :type debug: bool :raises mdp.NodeException: If negative eigenvalues occur, the covariance matrix may be singular or no component amounts to variation exceeding var_abs. """ # request the covariance matrix and clean up self.cov_mtx, avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # this is a bit counterintuitive, as it reshapes the average vector to # be a matrix. in this way, however, we spare the reshape # operation every time that 'execute' is called. self.avg = avg.reshape(1, avg.shape[0]) # range for the eigenvalues rng = self._adjust_output_dim() # if we have more variables then observations we are bound to fail here # suggest to use the NIPALSNode instead. if debug and self.tlen < self.input_dim: wrn = ('The number of observations (%d) ' 'is larger than the number of input variables ' '(%d). You may want to use ' 'the NIPALSNode instead.' % (self.tlen, self.input_dim)) _warnings.warn(wrn, mdp.MDPWarning) # total variance can be computed at this point: # note that vartot == d.sum() vartot = numx.diag(self.cov_mtx).sum() ## compute and sort the eigenvalues # compute the eigenvectors of the covariance matrix (inplace) # (eigenvalues sorted in ascending order) try: d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug)) # if reduce=False and svd=False. we should check for # negative eigenvalues and fail if not (self.reduce or self.svd or (self.desired_variance is not None)): if d.min() < 0: raise mdp.NodeException( "Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller, " "or set reduce=True and/or svd=True" % str(d)) except SymeigException as exception: err = str(exception)+("\nCovariance matrix may be singular." "Try setting svd=True.") raise mdp.NodeException(err) # delete covariance matrix if no exception occurred if not debug: del self.cov_mtx # sort by descending order d = numx.take(d, list(range(d.shape[0]-1, -1, -1))) v = v[:, ::-1] if self.desired_variance is not None: # throw away immediately negative eigenvalues d = d[ d > 0 ] # the number of principal components to keep has # been specified by the fraction of variance to be explained varcum = (old_div(d, vartot)).cumsum(axis=0) # select only the relevant eigenvalues # number of relevant eigenvalues neigval = int(varcum.searchsorted(self.desired_variance) + 1.) #self.explained_variance = varcum[neigval-1] # cut d = d[0:neigval] v = v[:, 0:neigval] # define the new output dimension self.output_dim = int(neigval) # automatic dimensionality reduction if self.reduce: # remove entries that are smaller then var_abs and # smaller then var_rel relative to the maximum d = d[ d > self.var_abs ] # check that we did not throw away everything if len(d) == 0: raise mdp.NodeException('No eigenvalues larger than' ' var_abs=%e!'%self.var_abs) d = d[ old_div(d, d.max()) > self.var_rel ] # filter for variance relative to total variance if self.var_part: d = d[ old_div(d, vartot) > self.var_part ] v = v[:, 0:d.shape[0]] self._output_dim = d.shape[0] # set explained variance self.explained_variance = old_div(d.sum(), vartot) # store the eigenvalues self.d = d # store the eigenvectors self.v = v # store the total variance self.total_variance = vartot