class PCANode(mdp.Node): """Filter the input data through the most significatives of its principal components. :ivar avg: Mean of the input data (available after training). :ivar v: Transposed of the projection matrix (available after training). :ivar d: Variance corresponding to the PCA components (eigenvalues of the covariance matrix). :ivar explained_variance: When output_dim has been specified as a fraction of the total variance, this is the fraction of the total variance that is actually explained. | .. admonition:: Reference More information about Principal Component Analysis, a.k.a. discrete Karhunen-Loeve transform can be found among others in I.T. Jolliffe, Principal Component Analysis, Springer-Verlag (1986). """ def __init__(self, input_dim=None, output_dim=None, dtype=None, svd=False, reduce=False, var_rel=1E-12, var_abs=1E-15, var_part=None): """Initializes an object of type 'PCANode'. The number of principal components to be kept can be specified as 'output_dim' directly (e.g. 'output_dim=10' means 10 components are kept) or by the fraction of variance to be explained (e.g. 'output_dim=0.95' means that as many components as necessary will be kept in order to explain 95% of the input variance). :param input_dim: Dimensionality of the input. Default is None. :type input_dim: int :param output_dim: Dimensionality of the output. Default is None. :type output_dim: int :param dtype: Datatype of the input. Default is None. :type dtype: numpy.dtype, str :param svd: If True use Singular Value Decomposition instead of the standard eigenvalue problem solver. Use it when PCANode complains about singular covariance matrices. Default is Flase. :type svd: bool :param reduce: Keep only those principal components which have a variance larger than 'var_abs' and a variance relative to the first principal component larger than 'var_rel' and a variance relative to total variance larger than 'var_part' (set var_part to None or 0 for no filtering). Default is False. :type reduce: bool .. note:: When the *reduce* switch is enabled, the actual number of principal components (self.output_dim) may be different from that set when creating the instance. :param var_rel: Variance relative to first principal component threshold. Default is 1E-12. :type var_rel: float :param var_abs: Absolute variance threshold. Default is 1E-15. :type var_abs: float :param var_part: Variance relative to total variance threshold. Default is None. :type var_part: float """ # this must occur *before* calling super! self.desired_variance = None super(PCANode, self).__init__(input_dim, output_dim, dtype) self.svd = svd # set routine for eigenproblem if svd: self._symeig = nongeneral_svd else: self._symeig = symeig self.var_abs = var_abs self.var_rel = var_rel self.var_part = var_part self.reduce = reduce # empirical covariance matrix, updated during the training phase self._cov_mtx = CovarianceMatrix(dtype) # attributes that defined in stop_training self.d = None # eigenvalues self.v = None # eigenvectors, first index for coordinates self.total_variance = None self.tlen = None self.avg = None self.explained_variance = None def _set_output_dim(self, n): if n <= 1 and isinstance(n, float): # set the output dim after training, when the variances are known self.desired_variance = n else: self._output_dim = n def _check_output(self, y): # check output rank if not y.ndim == 2: error_str = "y has rank %d, should be 2" % (y.ndim) raise mdp.NodeException(error_str) if y.shape[1] == 0 or y.shape[1] > self.output_dim: error_str = ("y has dimension %d" ", should be 0<y<=%d" % (y.shape[1], self.output_dim)) raise mdp.NodeException(error_str) def get_explained_variance(self): """The explained variance is the fraction of the original variance that can be explained by self._output_dim PCA components. If for example output_dim has been set to 0.95, the explained variance could be something like 0.958... .. note:: If output_dim was explicitly set to be a fixed number of components, there is no way to calculate the explained variance. :return: The explained variance. :rtype: float """ return self.explained_variance def _train(self, x): """Update the covariance matrix. :param x: The training data. :type x: numpy.ndarray """ self._cov_mtx.update(x) def _adjust_output_dim(self): """This function is used if the output dimensions is smaller than the input dimension (so only the larger eigenvectors have to be kept). If required it sets the output dim. :return: The eigenvector range. :rtype: tuple """ # if the number of principal components to keep is not specified, # keep all components if self.desired_variance is None and self.output_dim is None: self.output_dim = self.input_dim return None ## define the range of eigenvalues to compute # if the number of principal components to keep has been # specified directly if self.output_dim is not None and self.output_dim >= 1: # (eigenvalues sorted in ascending order) return (self.input_dim - self.output_dim + 1, self.input_dim) # otherwise, the number of principal components to keep has been # specified by the fraction of variance to be explained else: return None def _stop_training(self, debug=False): """Stop the training phase. :param debug: Determines if singular matrices itself are stored in self.cov_mtx and self.dcov_mtx to be examined, given that stop_training fails because of singular covmatrices. Default is False. :type debug: bool :raises mdp.NodeException: If negative eigenvalues occur, the covariance matrix may be singular or no component amounts to variation exceeding var_abs. """ # request the covariance matrix and clean up self.cov_mtx, avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # this is a bit counterintuitive, as it reshapes the average vector to # be a matrix. in this way, however, we spare the reshape # operation every time that 'execute' is called. self.avg = avg.reshape(1, avg.shape[0]) # range for the eigenvalues rng = self._adjust_output_dim() # if we have more variables then observations we are bound to fail here # suggest to use the NIPALSNode instead. if debug and self.tlen < self.input_dim: wrn = ('The number of observations (%d) ' 'is larger than the number of input variables ' '(%d). You may want to use ' 'the NIPALSNode instead.' % (self.tlen, self.input_dim)) _warnings.warn(wrn, mdp.MDPWarning) # total variance can be computed at this point: # note that vartot == d.sum() vartot = numx.diag(self.cov_mtx).sum() ## compute and sort the eigenvalues # compute the eigenvectors of the covariance matrix (inplace) # (eigenvalues sorted in ascending order) try: d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug)) # if reduce=False and svd=False. we should check for # negative eigenvalues and fail if not (self.reduce or self.svd or (self.desired_variance is not None)): if d.min() < 0: raise mdp.NodeException( "Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller, " "or set reduce=True and/or svd=True" % str(d)) except SymeigException as exception: err = str(exception) + ("\nCovariance matrix may be singular." "Try setting svd=True.") raise mdp.NodeException(err) # delete covariance matrix if no exception occurred if not debug: del self.cov_mtx # sort by descending order d = numx.take(d, list(range(d.shape[0] - 1, -1, -1))) v = v[:, ::-1] if self.desired_variance is not None: # throw away immediately negative eigenvalues d = d[d > 0] # the number of principal components to keep has # been specified by the fraction of variance to be explained varcum = (old_div(d, vartot)).cumsum(axis=0) # select only the relevant eigenvalues # number of relevant eigenvalues neigval = int(varcum.searchsorted(self.desired_variance) + 1.) #self.explained_variance = varcum[neigval-1] # cut d = d[0:neigval] v = v[:, 0:neigval] # define the new output dimension self.output_dim = int(neigval) # automatic dimensionality reduction if self.reduce: # remove entries that are smaller then var_abs and # smaller then var_rel relative to the maximum d = d[d > self.var_abs] # check that we did not throw away everything if len(d) == 0: raise mdp.NodeException('No eigenvalues larger than' ' var_abs=%e!' % self.var_abs) d = d[old_div(d, d.max()) > self.var_rel] # filter for variance relative to total variance if self.var_part: d = d[old_div(d, vartot) > self.var_part] v = v[:, 0:d.shape[0]] self._output_dim = d.shape[0] # set explained variance self.explained_variance = old_div(d.sum(), vartot) # store the eigenvalues self.d = d # store the eigenvectors self.v = v # store the total variance self.total_variance = vartot def get_projmatrix(self, transposed=1): """Returns the projection matrix. :param transposed: Determines whether the transposed projection matrix is returned. Default is True. :type transposed: bool :return: The projection matrix. :rtype: numpy.ndarray """ self._if_training_stop_training() if transposed: return self.v return self.v.T def get_recmatrix(self, transposed=1): """Returns the the back-projection matrix (i.e. the reconstruction matrix). :param transposed: Determines whether the transposed back-projection matrix (i.e. the reconstruction matrix) is returned. Default is True. :type transposed: bool :return: The back-projection matrix (i.e. the reconstruction matrix). :rtype: numpy.ndarray """ self._if_training_stop_training() if transposed: return self.v.T return self.v def _execute(self, x, n=None): """Project the input on the first 'n' principal components. If 'n' is not set, use all available components. :param x: Input with at least 'n' principle components. :type x: numpy.ndarray :param n: Number of first principle components. :type n: int :return: The projected input. :rtype: numpy.ndarray """ if n is not None: return mult(x - self.avg, self.v[:, :n]) return mult(x - self.avg, self.v) def _inverse(self, y, n=None): """Project data from the output to the input space using the first 'n' components. If 'n' is not set, use all available components. :param y: Data to be projected to the input space. :type y: numpy.ndarray :param n: Number of first principle components. :type n: int :return: The projected data :rtype: numpy.ndarray """ if n is None: n = y.shape[1] if n > self.output_dim: error_str = ("y has dimension %d," " should be at most %d" % (n, self.output_dim)) raise mdp.NodeException(error_str) v = self.get_recmatrix() if n is not None: return mult(y, v[:n, :]) + self.avg return mult(y, v) + self.avg
class SFANode(Node): """Extract the slowly varying components from the input data. :ivar avg: Mean of the input data (available after training) :vartype avg: numpy.ndarray :ivar sf: Matrix of the SFA filters (available after training) :vartype sf: numpy.ndarray :ivar d: Delta values corresponding to the SFA components (generalized eigenvalues). [See the docs of the ``get_eta_values`` method for more information] :vartype d: numpy.ndarray .. admonition:: Reference More information about Slow Feature Analysis can be found in Wiskott, L. and Sejnowski, T.J., Slow Feature Analysis: Unsupervised Learning of Invariances, Neural Computation, 14(4):715-770 (2002). """ def __init__(self, input_dim=None, output_dim=None, dtype=None, include_last_sample=True, rank_deficit_method='none'): """Initialize an object of type 'SFANode'. :param input_dim: The input dimensionality. :type input_dim: int :param output_dim: The output dimensionality. :type output_dim: int :param dtype: The datatype. :type dtype: numpy.dtype or str :param include_last_sample: If ``False`` the `train` method discards the last sample in every chunk during training when calculating the covariance matrix. The last sample is in this case only used for calculating the covariance matrix of the derivatives. The switch should be set to ``False`` if you plan to train with several small chunks. For example we can split a sequence (index is time):: x_1 x_2 x_3 x_4 in smaller parts like this:: x_1 x_2 x_2 x_3 x_3 x_4 The SFANode will see 3 derivatives for the temporal covariance matrix, and the first 3 points for the spatial covariance matrix. Of course you will need to use a generator that *connects* the small chunks (the last sample needs to be sent again in the next chunk). If ``include_last_sample`` was True, depending on the generator you use, you would either get:: x_1 x_2 x_2 x_3 x_3 x_4 in which case the last sample of every chunk would be used twice when calculating the covariance matrix, or:: x_1 x_2 x_3 x_4 in which case you loose the derivative between ``x_3`` and ``x_2``. If you plan to train with a single big chunk leave ``include_last_sample`` to the default value, i.e. ``True``. You can even change this behaviour during training. Just set the corresponding switch in the `train` method. :type include_last_sample: bool :param rank_deficit_method: Possible values: 'none' (default), 'reg', 'pca', 'svd', 'auto' If not 'none', the ``stop_train`` method solves the SFA eigenvalue problem in a way that is robust against linear redundancies in the input data. This would otherwise lead to rank deficit in the covariance matrix, which usually yields a SymeigException ('Covariance matrices may be singular'). There are several solving methods implemented: reg - works by regularization pca - works by PCA svd - works by SVD ldl - works by LDL decomposition (requires SciPy >= 1.0) auto - (Will be: selects the best-benchmarked method of the above) Currently it simply selects pca. Note: If you already received an exception SymeigException ('Covariance matrices may be singular') you can manually set the solving method for an existing node:: sfa.set_rank_deficit_method('pca') That means,:: sfa = SFANode(rank_deficit='pca') is equivalent to:: sfa = SFANode() sfa.set_rank_deficit_method('pca') After such an adjustment you can run ``stop_training()`` again, which would save a potentially time-consuming rerun of all ``train()`` calls. :type rank_deficit_method: str """ super(SFANode, self).__init__(input_dim, output_dim, dtype) self._include_last_sample = include_last_sample # init two covariance matrices self._init_cov() # set routine for eigenproblem self.set_rank_deficit_method(rank_deficit_method) self.rank_threshold = 1e-12 self.rank_deficit = 0 # SFA eigenvalues and eigenvectors, will be set after training self.d = None self.sf = None # second index for outputs self.avg = None self._bias = None # avg multiplied with sf self.tlen = None def _init_cov(self): # init two covariance matrices # one for the input data self._cov_mtx = CovarianceMatrix(self.dtype) # one for the input data self._dcov_mtx = CovarianceMatrix(self.dtype) def set_rank_deficit_method(self, rank_deficit_method): if rank_deficit_method == 'pca': self._symeig = symeig_semidefinite_pca elif rank_deficit_method == 'reg': self._symeig = symeig_semidefinite_reg elif rank_deficit_method == 'svd': self._symeig = symeig_semidefinite_svd elif rank_deficit_method == 'ldl': try: from scipy.linalg.lapack import dsytrf except ImportError: err_msg = ( "ldl method for solving SFA with rank deficit covariance " "requires at least SciPy 1.0.") raise NodeException(err_msg) self._symeig = symeig_semidefinite_ldl elif rank_deficit_method == 'auto': self._symeig = symeig_semidefinite_pca elif rank_deficit_method == 'none': self._symeig = symeig else: raise ValueError("Invalid value for rank_deficit_method: %s" % str(rank_deficit_method)) def time_derivative(self, x): """Compute the linear approximation of the time derivative :param x: The time series data. :type x: numpy.ndarray :returns: Piecewise linear approximation of the time derivative. :rtype: numpy.ndarray """ # this is faster than a linear_filter or a weave-inline solution return x[1:, :] - x[:-1, :] def _set_range(self): if self.output_dim is not None and self.output_dim <= self.input_dim: # (eigenvalues sorted in ascending order) rng = (1, self.output_dim) else: # otherwise, keep all output components rng = None self.output_dim = self.input_dim return rng def _check_train_args(self, x, *args, **kwargs): """Raises exception if time dimension does not have enough elements. :param x: The time series data. :type x: numpy.ndarray :param *args: :param **kwargs: """ # check that we have at least 2 time samples to # compute the update for the derivative covariance matrix s = x.shape[0] if s < 2: raise TrainingException('Need at least 2 time samples to ' 'compute time derivative (%d given)' % s) def _train(self, x, include_last_sample=None): """Training method. :param x: The time series data. :type x: numpy.ndarray :param include_last_sample: For the ``include_last_sample`` switch have a look at the SFANode.__init__ docstring. :type include_last_sample: bool """ if include_last_sample is None: include_last_sample = self._include_last_sample # works because x[:None] == x[:] last_sample_index = None if include_last_sample else -1 # update the covariance matrices self._cov_mtx.update(x[:last_sample_index, :]) self._dcov_mtx.update(self.time_derivative(x)) def _stop_training(self, debug=False): # request the covariance matrices and clean up if hasattr(self, '_dcov_mtx'): self.cov_mtx, self.avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # do not center around the mean: # we want the second moment matrix (centered about 0) and # not the second central moment matrix (centered about the mean), i.e. # the covariance matrix if hasattr(self, '_dcov_mtx'): self.dcov_mtx, self.davg, self.dtlen = self._dcov_mtx.fix( center=False) del self._dcov_mtx rng = self._set_range() # solve the generalized eigenvalue problem # the eigenvalues are already ordered in ascending order try: try: # We first try to fulfill the extended signature described # in mdp.utils.symeig_semidefinite self.d, self.sf = self._symeig( self.dcov_mtx, self.cov_mtx, True, "on", rng, overwrite=(not debug), rank_threshold=self.rank_threshold, dfc_out=self) except TypeError: self.d, self.sf = self._symeig(self.dcov_mtx, self.cov_mtx, True, "on", rng, overwrite=(not debug)) d = self.d # check that we get only *positive* eigenvalues if d.min() < 0: err_msg = ( "Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller,\n" "or prepend the SFANode with a PCANode(reduce=True)\n" "or PCANode(svd=True)\n" "or set a rank deficit method, e.g.\n" "create the SFA node with rank_deficit_method='auto'\n" "and try higher values for rank_threshold, e.g. try\n" "your_node.rank_threshold = 1e-10, 1e-8, 1e-6, ..." % str(d)) raise NodeException(err_msg) except SymeigException as exception: errstr = (str(exception) + "\n Covariance matrices may be singular.\n" + SINGULAR_VALUE_MSG) raise NodeException(errstr) if not debug: # delete covariance matrix if no exception occurred del self.cov_mtx del self.dcov_mtx # store bias self._bias = mult(self.avg, self.sf) def _execute(self, x, n=None): """Compute the output of the slowest functions. :param x: The time series data. :type x: numpy.ndarray :param n: The number of slowest components. If 'n' is an integer, then use the first 'n' slowest components. :type n: int :returns: The output of the slowest functions. :rtype: numpy.ndarray """ if n: sf = self.sf[:, :n] bias = self._bias[:n] else: sf = self.sf bias = self._bias return mult(x, sf) - bias def _inverse(self, y): return mult(y, pinv(self.sf)) + self.avg def get_eta_values(self, t=1): """Return the eta values of the slow components learned during the training phase. If the training phase has not been completed yet, call `stop_training`. The delta value of a signal is a measure of its temporal variation, and is defined as the mean of the derivative squared, i.e. delta(x) = mean(dx/dt(t)^2). delta(x) is zero if x is a constant signal, and increases if the temporal variation of the signal is bigger. The eta value is a more intuitive measure of temporal variation, defined as eta(x) = t/(2*pi) * sqrt(delta(x)) If x is a signal of length 't' which consists of a sine function that accomplishes exactly N oscillations, then eta(x)=N. :param t: Sampling frequency in Hz. The original definition in (Wiskott and Sejnowski, 2002) is obtained for t = number of training data points, while for t=1 (default), this corresponds to the beta-value defined in (Berkes and Wiskott, 2005). :returns: The eta values of the slow components learned during the training phase. """ if self.is_training(): self.stop_training() return self._refcast(t / (2 * numx.pi) * numx.sqrt(self.d))
class PCANode(mdp.Node): """Filter the input data through the most significatives of its principal components. **Internal variables of interest** ``self.avg`` Mean of the input data (available after training). ``self.v`` Transposed of the projection matrix (available after training). ``self.d`` Variance corresponding to the PCA components (eigenvalues of the covariance matrix). ``self.explained_variance`` When output_dim has been specified as a fraction of the total variance, this is the fraction of the total variance that is actually explained. More information about Principal Component Analysis, a.k.a. discrete Karhunen-Loeve transform can be found among others in I.T. Jolliffe, Principal Component Analysis, Springer-Verlag (1986). """ def __init__(self, input_dim=None, output_dim=None, dtype=None, svd=False, reduce=False, var_rel=1E-12, var_abs=1E-15, var_part=None): """The number of principal components to be kept can be specified as 'output_dim' directly (e.g. 'output_dim=10' means 10 components are kept) or by the fraction of variance to be explained (e.g. 'output_dim=0.95' means that as many components as necessary will be kept in order to explain 95% of the input variance). Other Keyword Arguments: svd -- if True use Singular Value Decomposition instead of the standard eigenvalue problem solver. Use it when PCANode complains about singular covariance matrices reduce -- Keep only those principal components which have a variance larger than 'var_abs' and a variance relative to the first principal component larger than 'var_rel' and a variance relative to total variance larger than 'var_part' (set var_part to None or 0 for no filtering). Note: when the 'reduce' switch is enabled, the actual number of principal components (self.output_dim) may be different from that set when creating the instance. """ # this must occur *before* calling super! self.desired_variance = None super(PCANode, self).__init__(input_dim, output_dim, dtype) self.svd = svd # set routine for eigenproblem if svd: self._symeig = nongeneral_svd else: self._symeig = symeig self.var_abs = var_abs self.var_rel = var_rel self.var_part = var_part self.reduce = reduce # empirical covariance matrix, updated during the training phase self._cov_mtx = CovarianceMatrix(dtype) # attributes that defined in stop_training self.d = None # eigenvalues self.v = None # eigenvectors, first index for coordinates self.total_variance = None self.tlen = None self.avg = None self.explained_variance = None def _set_output_dim(self, n): if n <= 1 and isinstance(n, float): # set the output dim after training, when the variances are known self.desired_variance = n else: self._output_dim = n def _check_output(self, y): # check output rank if not y.ndim == 2: error_str = "y has rank %d, should be 2" % (y.ndim) raise mdp.NodeException(error_str) if y.shape[1] == 0 or y.shape[1] > self.output_dim: error_str = ("y has dimension %d" ", should be 0<y<=%d" % (y.shape[1], self.output_dim)) raise mdp.NodeException(error_str) def get_explained_variance(self): """Return the fraction of the original variance that can be explained by self._output_dim PCA components. If for example output_dim has been set to 0.95, the explained variance could be something like 0.958... Note that if output_dim was explicitly set to be a fixed number of components, there is no way to calculate the explained variance. """ return self.explained_variance def _train(self, x): # update the covariance matrix self._cov_mtx.update(x) def _adjust_output_dim(self): """Return the eigenvector range and set the output dim if required. This is used if the output dimensions is smaller than the input dimension (so only the larger eigenvectors have to be kept). """ # if the number of principal components to keep is not specified, # keep all components if self.desired_variance is None and self.output_dim is None: self.output_dim = self.input_dim return None ## define the range of eigenvalues to compute # if the number of principal components to keep has been # specified directly if self.output_dim is not None and self.output_dim >= 1: # (eigenvalues sorted in ascending order) return (self.input_dim - self.output_dim + 1, self.input_dim) # otherwise, the number of principal components to keep has been # specified by the fraction of variance to be explained else: return None def _stop_training(self, debug=False): """Stop the training phase. Keyword arguments: debug=True if stop_training fails because of singular cov matrices, the singular matrices itselves are stored in self.cov_mtx and self.dcov_mtx to be examined. """ # request the covariance matrix and clean up self.cov_mtx, avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # this is a bit counterintuitive, as it reshapes the average vector to # be a matrix. in this way, however, we spare the reshape # operation every time that 'execute' is called. self.avg = avg.reshape(1, avg.shape[0]) # range for the eigenvalues rng = self._adjust_output_dim() # if we have more variables then observations we are bound to fail here # suggest to use the NIPALSNode instead. if debug and self.tlen < self.input_dim: wrn = ('The number of observations (%d) ' 'is larger than the number of input variables ' '(%d). You may want to use ' 'the NIPALSNode instead.' % (self.tlen, self.input_dim)) _warnings.warn(wrn, mdp.MDPWarning) # total variance can be computed at this point: # note that vartot == d.sum() vartot = numx.diag(self.cov_mtx).sum() ## compute and sort the eigenvalues # compute the eigenvectors of the covariance matrix (inplace) # (eigenvalues sorted in ascending order) try: d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug)) # if reduce=False and svd=False. we should check for # negative eigenvalues and fail if not (self.reduce or self.svd or (self.desired_variance is not None)): if d.min() < 0: raise mdp.NodeException( "Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller, " "or set reduce=True and/or svd=True" % str(d)) except SymeigException, exception: err = str(exception)+("\nCovariance matrix may be singular." "Try setting svd=True.") raise mdp.NodeException(err) # delete covariance matrix if no exception occurred if not debug: del self.cov_mtx # sort by descending order d = numx.take(d, range(d.shape[0]-1, -1, -1)) v = v[:, ::-1] if self.desired_variance is not None: # throw away immediately negative eigenvalues d = d[ d > 0 ] # the number of principal components to keep has # been specified by the fraction of variance to be explained varcum = (d / vartot).cumsum(axis=0) # select only the relevant eigenvalues # number of relevant eigenvalues neigval = varcum.searchsorted(self.desired_variance) + 1. #self.explained_variance = varcum[neigval-1] # cut d = d[0:neigval] v = v[:, 0:neigval] # define the new output dimension self.output_dim = int(neigval) # automatic dimensionality reduction if self.reduce: # remove entries that are smaller then var_abs and # smaller then var_rel relative to the maximum d = d[ d > self.var_abs ] d = d[ d / d.max() > self.var_rel ] # filter for variance relative to total variance if self.var_part: d = d[ d / vartot > self.var_part ] v = v[:, 0:d.shape[0]] self._output_dim = d.shape[0] # set explained variance self.explained_variance = d.sum() / vartot # store the eigenvalues self.d = d # store the eigenvectors self.v = v # store the total variance self.total_variance = vartot
class SFANode(Node): """Extract the slowly varying components from the input data. More information about Slow Feature Analysis can be found in Wiskott, L. and Sejnowski, T.J., Slow Feature Analysis: Unsupervised Learning of Invariances, Neural Computation, 14(4):715-770 (2002). **Instance variables of interest** ``self.avg`` Mean of the input data (available after training) ``self.sf`` Matrix of the SFA filters (available after training) ``self.d`` Delta values corresponding to the SFA components (generalized eigenvalues). [See the docs of the ``get_eta_values`` method for more information] **Special arguments for constructor** ``include_last_sample`` If ``False`` the `train` method discards the last sample in every chunk during training when calculating the covariance matrix. The last sample is in this case only used for calculating the covariance matrix of the derivatives. The switch should be set to ``False`` if you plan to train with several small chunks. For example we can split a sequence (index is time):: x_1 x_2 x_3 x_4 in smaller parts like this:: x_1 x_2 x_2 x_3 x_3 x_4 The SFANode will see 3 derivatives for the temporal covariance matrix, and the first 3 points for the spatial covariance matrix. Of course you will need to use a generator that *connects* the small chunks (the last sample needs to be sent again in the next chunk). If ``include_last_sample`` was True, depending on the generator you use, you would either get:: x_1 x_2 x_2 x_3 x_3 x_4 in which case the last sample of every chunk would be used twice when calculating the covariance matrix, or:: x_1 x_2 x_3 x_4 in which case you loose the derivative between ``x_3`` and ``x_2``. If you plan to train with a single big chunk leave ``include_last_sample`` to the default value, i.e. ``True``. You can even change this behaviour during training. Just set the corresponding switch in the `train` method. """ def __init__(self, input_dim=None, output_dim=None, dtype=None, include_last_sample=True): """ For the ``include_last_sample`` switch have a look at the SFANode class docstring. """ super(SFANode, self).__init__(input_dim, output_dim, dtype) self._include_last_sample = include_last_sample # init two covariance matrices # one for the input data self._cov_mtx = CovarianceMatrix(dtype) # one for the derivatives self._dcov_mtx = CovarianceMatrix(dtype) # set routine for eigenproblem self._symeig = symeig # SFA eigenvalues and eigenvectors, will be set after training self.d = None self.sf = None # second index for outputs self.avg = None self._bias = None # avg multiplied with sf self.tlen = None def time_derivative(self, x): """Compute the linear approximation of the time derivative.""" # this is faster than a linear_filter or a weave-inline solution return x[1:, :] - x[:-1, :] def _set_range(self): if self.output_dim is not None and self.output_dim <= self.input_dim: # (eigenvalues sorted in ascending order) rng = (1, self.output_dim) else: # otherwise, keep all output components rng = None self.output_dim = self.input_dim return rng def _check_train_args(self, x, *args, **kwargs): # check that we have at least 2 time samples to # compute the update for the derivative covariance matrix s = x.shape[0] if s < 2: raise TrainingException('Need at least 2 time samples to ' 'compute time derivative (%d given)' % s) def _train(self, x, include_last_sample=None): """ For the ``include_last_sample`` switch have a look at the SFANode class docstring. """ if include_last_sample is None: include_last_sample = self._include_last_sample # works because x[:None] == x[:] last_sample_index = None if include_last_sample else -1 # update the covariance matrices self._cov_mtx.update(x[:last_sample_index, :]) self._dcov_mtx.update(self.time_derivative(x)) def _stop_training(self, debug=False): ##### request the covariance matrices and clean up self.cov_mtx, self.avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # do not center around the mean: # we want the second moment matrix (centered about 0) and # not the second central moment matrix (centered about the mean), i.e. # the covariance matrix self.dcov_mtx, self.davg, self.dtlen = self._dcov_mtx.fix(center=False) del self._dcov_mtx rng = self._set_range() #### solve the generalized eigenvalue problem # the eigenvalues are already ordered in ascending order try: self.d, self.sf = self._symeig(self.dcov_mtx, self.cov_mtx, range=rng, overwrite=(not debug)) d = self.d # check that we get only *positive* eigenvalues if d.min() < 0: err_msg = ( "Got negative eigenvalues: %s." " You may either set output_dim to be smaller," " or prepend the SFANode with a PCANode(reduce=True)" " or PCANode(svd=True)" % str(d)) raise NodeException(err_msg) except SymeigException as exception: errstr = str(exception) + "\n Covariance matrices may be singular." raise NodeException(errstr) if not debug: # delete covariance matrix if no exception occurred del self.cov_mtx del self.dcov_mtx # store bias self._bias = mult(self.avg, self.sf) def _execute(self, x, n=None): """Compute the output of the slowest functions. If 'n' is an integer, then use the first 'n' slowest components.""" if n: sf = self.sf[:, :n] bias = self._bias[:n] else: sf = self.sf bias = self._bias return mult(x, sf) - bias def _inverse(self, y): return mult(y, pinv(self.sf)) + self.avg def get_eta_values(self, t=1): """Return the eta values of the slow components learned during the training phase. If the training phase has not been completed yet, call `stop_training`. The delta value of a signal is a measure of its temporal variation, and is defined as the mean of the derivative squared, i.e. delta(x) = mean(dx/dt(t)^2). delta(x) is zero if x is a constant signal, and increases if the temporal variation of the signal is bigger. The eta value is a more intuitive measure of temporal variation, defined as eta(x) = t/(2*pi) * sqrt(delta(x)) If x is a signal of length 't' which consists of a sine function that accomplishes exactly N oscillations, then eta(x)=N. :Parameters: t Sampling frequency in Hz. The original definition in (Wiskott and Sejnowski, 2002) is obtained for t = number of training data points, while for t=1 (default), this corresponds to the beta-value defined in (Berkes and Wiskott, 2005). """ if self.is_training(): self.stop_training() return self._refcast(t / (2 * numx.pi) * numx.sqrt(self.d))
class FANode(mdp.Node): """Perform Factor Analysis. The current implementation should be most efficient for long data sets: the sufficient statistics are collected in the training phase, and all EM-cycles are performed at its end. The ``execute`` method returns the Maximum A Posteriori estimate of the latent variables. The ``generate_input`` method generates observations from the prior distribution. .. attribute:: mu Mean of the input data (available after training) .. attribute:: A Generating weights (available after training) .. attribute:: E_y_mtx Weights for Maximum A Posteriori inference .. attribute:: sigma Vector of estimated variance of the noise for all input components | .. admonition:: Reference More information about Factor Analysis can be found in Max Welling's classnotes: http://www.ics.uci.edu/~welling/classnotes/classnotes.html , in the chapter 'Linear Models'. """ def __init__(self, tol=1e-4, max_cycles=100, verbose=False, input_dim=None, output_dim=None, dtype=None): """Initializes an object of type 'FANode'. :param tol: Tolerance (minimum change in log-likelihood before exiting the EM algorithm). :type tol: float :param max_cycles: Maximum number of EM cycles/ :type max_cycles: int :param verbose: If true, print log-likelihood during the EM-cycles. :type verbose: bool :param input_dim: The input dimensionality. :type input_dim: int :param output_dim: The output dimensionality. :type output_dim: int :param dtype: The datatype. :type dtype: numpy.dtype or str """ # Notation as in Max Welling's notes super(FANode, self).__init__(input_dim, output_dim, dtype) self.tol = tol self.max_cycles = max_cycles self.verbose = verbose self._cov_mtx = CovarianceMatrix(dtype, bias=True) def _train(self, x): # update the covariance matrix self._cov_mtx.update(x) def _stop_training(self): #### some definitions verbose = self.verbose typ = self.dtype tol = self.tol d = self.input_dim # if the number of latent variables is not specified, # set it equal to the number of input components if not self.output_dim: self.output_dim = d k = self.output_dim # indices of the diagonal elements of a dxd or kxk matrix idx_diag_d = [i*(d+1) for i in range(d)] idx_diag_k = [i*(k+1) for i in range(k)] # constant term in front of the log-likelihood const = -d/2. * numx.log(2.*numx.pi) ##### request the covariance matrix and clean up cov_mtx, mu, tlen = self._cov_mtx.fix() del self._cov_mtx cov_diag = cov_mtx.diagonal() ##### initialize the parameters # noise variances sigma = cov_diag # loading factors # Zoubin uses the determinant of cov_mtx^1/d as scale but it's # too slow for large matrices. Is the product of the diagonal a good # approximation? if d<=300: scale = det(cov_mtx)**(old_div(1.,d)) else: scale = numx.product(sigma)**(old_div(1.,d)) if scale <= 0.: err = ("The covariance matrix of the data is singular. " "Redundant dimensions need to be removed.") raise NodeException(err) A = normal(0., sqrt(old_div(scale,k)), size=(d, k)).astype(typ) ##### EM-cycle lhood_curve = [] base_lhood = None old_lhood = -numx.inf for t in range(self.max_cycles): ## compute B = (A A^T + Sigma)^-1 B = mult(A, A.T) # B += diag(sigma), avoid computing diag(sigma) which is dxd B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d)+sigma) # this quantity is used later for the log-likelihood # abs is there to avoid numerical errors when det < 0 log_det_B = numx.log(abs(det(B))) # end the computation of B B = inv(B) ## other useful quantities trA_B = mult(A.T, B) trA_B_cov_mtx = mult(trA_B, cov_mtx) ##### E-step ## E_yyT = E(y_n y_n^T | x_n) E_yyT = - mult(trA_B, A) + mult(trA_B_cov_mtx, trA_B.T) # E_yyT += numx.eye(k) E_yyT.ravel().put(idx_diag_k, E_yyT.ravel().take(idx_diag_k)+1.) ##### M-step A = mult(trA_B_cov_mtx.T, inv(E_yyT)) sigma = cov_diag - (mult(A, trA_B_cov_mtx)).diagonal() ##### log-likelihood trace_B_cov = (B*cov_mtx.T).sum() # this is actually likelihood/tlen. lhood = const - 0.5*log_det_B - 0.5*trace_B_cov if verbose: print('cycle', t, 'log-lhood:', lhood) ##### convergence criterion if base_lhood is None: base_lhood = lhood else: # convergence criterion if (lhood-base_lhood)<(1.+tol)*(old_lhood-base_lhood): break if lhood < old_lhood: # this should never happen # it sometimes does, e.g. if the noise is extremely low, # because of numerical rounding effects warnings.warn(_LHOOD_WARNING, mdp.MDPWarning) old_lhood = lhood lhood_curve.append(lhood) self.tlen = tlen self.A = A self.mu = mu.reshape(1, d) self.sigma = sigma ## MAP matrix # compute B = (A A^T + Sigma)^-1 B = mult(A, A.T).copy() B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d)+sigma) B = inv(B) self.E_y_mtx = mult(B.T, A) self.lhood = lhood_curve def _execute(self, x): return mult(x-self.mu, self.E_y_mtx) @staticmethod def is_invertible(): return False def generate_input(self, len_or_y=1, noise=False): """Generate data from the prior distribution. If the training phase has not been completed yet, call stop_training. :param len_or_y: If integer, it specified the number of observation to generate. If array, it is used as a set of samples of the latent variables :param noise: If true, generation includes the estimated noise :type noise: bool :return: The generated data. :rtype: numpy.ndarray """ self._if_training_stop_training() # set the output dimension if necessary if self.output_dim is None: # if the input_dim is not defined, raise an exception if self.input_dim is None: errstr = ("Number of input dimensions undefined. Inversion " "not possible.") raise NodeException(errstr) self.output_dim = self.input_dim if isinstance(len_or_y, int): size = (len_or_y, self.output_dim) y = self._refcast(mdp.numx_rand.normal(size=size)) else: y = self._refcast(len_or_y) self._check_output(y) res = mult(y, self.A.T)+self.mu if noise: ns = mdp.numx_rand.normal(size=(y.shape[0], self.input_dim)) ns *= numx.sqrt(self.sigma) res += self._refcast(ns) return res
class FANode(mdp.Node): """Perform Factor Analysis. The current implementation should be most efficient for long data sets: the sufficient statistics are collected in the training phase, and all EM-cycles are performed at its end. The ``execute`` method returns the Maximum A Posteriori estimate of the latent variables. The ``generate_input`` method generates observations from the prior distribution. **Internal variables of interest** ``self.mu`` Mean of the input data (available after training) ``self.A`` Generating weights (available after training) ``self.E_y_mtx`` Weights for Maximum A Posteriori inference ``self.sigma`` Vector of estimated variance of the noise for all input components More information about Factor Analysis can be found in Max Welling's classnotes: http://www.ics.uci.edu/~welling/classnotes/classnotes.html , in the chapter 'Linear Models'. """ def __init__(self, tol=1e-4, max_cycles=100, verbose=False, input_dim=None, output_dim=None, dtype=None): """ :Parameters: tol tolerance (minimum change in log-likelihood before exiting the EM algorithm) max_cycles maximum number of EM cycles verbose if true, print log-likelihood during the EM-cycles """ # Notation as in Max Welling's notes super(FANode, self).__init__(input_dim, output_dim, dtype) self.tol = tol self.max_cycles = max_cycles self.verbose = verbose self._cov_mtx = CovarianceMatrix(dtype, bias=True) def _train(self, x): # update the covariance matrix self._cov_mtx.update(x) def _stop_training(self): #### some definitions verbose = self.verbose typ = self.dtype tol = self.tol d = self.input_dim # if the number of latent variables is not specified, # set it equal to the number of input components if not self.output_dim: self.output_dim = d k = self.output_dim # indices of the diagonal elements of a dxd or kxk matrix idx_diag_d = [i * (d + 1) for i in range(d)] idx_diag_k = [i * (k + 1) for i in range(k)] # constant term in front of the log-likelihood const = -d / 2. * numx.log(2. * numx.pi) ##### request the covariance matrix and clean up cov_mtx, mu, tlen = self._cov_mtx.fix() del self._cov_mtx cov_diag = cov_mtx.diagonal() ##### initialize the parameters # noise variances sigma = cov_diag # loading factors # Zoubin uses the determinant of cov_mtx^1/d as scale but it's # too slow for large matrices. Is the product of the diagonal a good # approximation? if d <= 300: scale = det(cov_mtx)**(1. / d) else: scale = numx.product(sigma)**(1. / d) if scale <= 0.: err = ("The covariance matrix of the data is singular. " "Redundant dimensions need to be removed.") raise NodeException(err) A = normal(0., sqrt(scale / k), size=(d, k)).astype(typ) ##### EM-cycle lhood_curve = [] base_lhood = None old_lhood = -numx.inf for t in xrange(self.max_cycles): ## compute B = (A A^T + Sigma)^-1 B = mult(A, A.T) # B += diag(sigma), avoid computing diag(sigma) which is dxd B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d) + sigma) # this quantity is used later for the log-likelihood # abs is there to avoid numerical errors when det < 0 log_det_B = numx.log(abs(det(B))) # end the computation of B B = inv(B) ## other useful quantities trA_B = mult(A.T, B) trA_B_cov_mtx = mult(trA_B, cov_mtx) ##### E-step ## E_yyT = E(y_n y_n^T | x_n) E_yyT = -mult(trA_B, A) + mult(trA_B_cov_mtx, trA_B.T) # E_yyT += numx.eye(k) E_yyT.ravel().put(idx_diag_k, E_yyT.ravel().take(idx_diag_k) + 1.) ##### M-step A = mult(trA_B_cov_mtx.T, inv(E_yyT)) sigma = cov_diag - (mult(A, trA_B_cov_mtx)).diagonal() ##### log-likelihood trace_B_cov = (B * cov_mtx.T).sum() # this is actually likelihood/tlen. lhood = const - 0.5 * log_det_B - 0.5 * trace_B_cov if verbose: print 'cycle', t, 'log-lhood:', lhood ##### convergence criterion if base_lhood is None: base_lhood = lhood else: # convergence criterion if (lhood - base_lhood) < (1. + tol) * (old_lhood - base_lhood): break if lhood < old_lhood: # this should never happen # it sometimes does, e.g. if the noise is extremely low, # because of numerical rounding effects warnings.warn(_LHOOD_WARNING, mdp.MDPWarning) old_lhood = lhood lhood_curve.append(lhood) self.tlen = tlen self.A = A self.mu = mu.reshape(1, d) self.sigma = sigma ## MAP matrix # compute B = (A A^T + Sigma)^-1 B = mult(A, A.T).copy() B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d) + sigma) B = inv(B) self.E_y_mtx = mult(B.T, A) self.lhood = lhood_curve def _execute(self, x): return mult(x - self.mu, self.E_y_mtx) @staticmethod def is_invertible(): return False def generate_input(self, len_or_y=1, noise=False): """ Generate data from the prior distribution. If the training phase has not been completed yet, call stop_training. :Arguments: len_or_y If integer, it specified the number of observation to generate. If array, it is used as a set of samples of the latent variables noise if true, generation includes the estimated noise """ self._if_training_stop_training() # set the output dimension if necessary if self.output_dim is None: # if the input_dim is not defined, raise an exception if self.input_dim is None: errstr = ("Number of input dimensions undefined. Inversion " "not possible.") raise NodeException(errstr) self.output_dim = self.input_dim if isinstance(len_or_y, int): size = (len_or_y, self.output_dim) y = self._refcast(mdp.numx_rand.normal(size=size)) else: y = self._refcast(len_or_y) self._check_output(y) res = mult(y, self.A.T) + self.mu if noise: ns = mdp.numx_rand.normal(size=(y.shape[0], self.input_dim)) ns *= numx.sqrt(self.sigma) res += self._refcast(ns) return res
class SFANode(Node): """Extract the slowly varying components from the input data. More information about Slow Feature Analysis can be found in Wiskott, L. and Sejnowski, T.J., Slow Feature Analysis: Unsupervised Learning of Invariances, Neural Computation, 14(4):715-770 (2002). **Instance variables of interest** ``self.avg`` Mean of the input data (available after training) ``self.sf`` Matrix of the SFA filters (available after training) ``self.d`` Delta values corresponding to the SFA components (generalized eigenvalues). [See the docs of the ``get_eta_values`` method for more information] **Special arguments for constructor** ``include_last_sample`` If ``False`` the `train` method discards the last sample in every chunk during training when calculating the covariance matrix. The last sample is in this case only used for calculating the covariance matrix of the derivatives. The switch should be set to ``False`` if you plan to train with several small chunks. For example we can split a sequence (index is time):: x_1 x_2 x_3 x_4 in smaller parts like this:: x_1 x_2 x_2 x_3 x_3 x_4 The SFANode will see 3 derivatives for the temporal covariance matrix, and the first 3 points for the spatial covariance matrix. Of course you will need to use a generator that *connects* the small chunks (the last sample needs to be sent again in the next chunk). If ``include_last_sample`` was True, depending on the generator you use, you would either get:: x_1 x_2 x_2 x_3 x_3 x_4 in which case the last sample of every chunk would be used twice when calculating the covariance matrix, or:: x_1 x_2 x_3 x_4 in which case you loose the derivative between ``x_3`` and ``x_2``. If you plan to train with a single big chunk leave ``include_last_sample`` to the default value, i.e. ``True``. You can even change this behaviour during training. Just set the corresponding switch in the `train` method. """ def __init__(self, input_dim=None, output_dim=None, dtype=None, include_last_sample=True): """ For the ``include_last_sample`` switch have a look at the SFANode class docstring. """ super(SFANode, self).__init__(input_dim, output_dim, dtype) self._include_last_sample = include_last_sample # init two covariance matrices # one for the input data self._cov_mtx = CovarianceMatrix(dtype) # one for the derivatives self._dcov_mtx = CovarianceMatrix(dtype) # set routine for eigenproblem self._symeig = symeig # SFA eigenvalues and eigenvectors, will be set after training self.d = None self.sf = None # second index for outputs self.avg = None self._bias = None # avg multiplied with sf self.tlen = None def time_derivative(self, x): """Compute the linear approximation of the time derivative.""" # this is faster than a linear_filter or a weave-inline solution return x[1:, :]-x[:-1, :] def _set_range(self): if self.output_dim is not None and self.output_dim <= self.input_dim: # (eigenvalues sorted in ascending order) rng = (1, self.output_dim) else: # otherwise, keep all output components rng = None self.output_dim = self.input_dim return rng def _check_train_args(self, x, *args, **kwargs): # check that we have at least 2 time samples to # compute the update for the derivative covariance matrix s = x.shape[0] if s < 2: raise TrainingException('Need at least 2 time samples to ' 'compute time derivative (%d given)'%s) def _train(self, x, include_last_sample=None): """ For the ``include_last_sample`` switch have a look at the SFANode class docstring. """ if include_last_sample is None: include_last_sample = self._include_last_sample # works because x[:None] == x[:] last_sample_index = None if include_last_sample else -1 # update the covariance matrices self._cov_mtx.update(x[:last_sample_index, :]) self._dcov_mtx.update(self.time_derivative(x)) def _stop_training(self, debug=False): ##### request the covariance matrices and clean up self.cov_mtx, self.avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # do not center around the mean: # we want the second moment matrix (centered about 0) and # not the second central moment matrix (centered about the mean), i.e. # the covariance matrix self.dcov_mtx, self.davg, self.dtlen = self._dcov_mtx.fix(center=False) del self._dcov_mtx rng = self._set_range() #### solve the generalized eigenvalue problem # the eigenvalues are already ordered in ascending order try: self.d, self.sf = self._symeig(self.dcov_mtx, self.cov_mtx, range=rng, overwrite=(not debug)) d = self.d # check that we get only *positive* eigenvalues if d.min() < 0: err_msg = ("Got negative eigenvalues: %s." " You may either set output_dim to be smaller," " or prepend the SFANode with a PCANode(reduce=True)" " or PCANode(svd=True)"% str(d)) raise NodeException(err_msg) except SymeigException, exception: errstr = str(exception)+"\n Covariance matrices may be singular." raise NodeException(errstr) if not debug: # delete covariance matrix if no exception occurred del self.cov_mtx del self.dcov_mtx # store bias self._bias = mult(self.avg, self.sf)
class PCANode(mdp.Node): """Filter the input data through the most significatives of its principal components. **Internal variables of interest** ``self.avg`` Mean of the input data (available after training). ``self.v`` Transposed of the projection matrix (available after training). ``self.d`` Variance corresponding to the PCA components (eigenvalues of the covariance matrix). ``self.explained_variance`` When output_dim has been specified as a fraction of the total variance, this is the fraction of the total variance that is actually explained. More information about Principal Component Analysis, a.k.a. discrete Karhunen-Loeve transform can be found among others in I.T. Jolliffe, Principal Component Analysis, Springer-Verlag (1986). """ def __init__(self, input_dim=None, output_dim=None, dtype=None, svd=False, reduce=False, var_rel=1E-12, var_abs=1E-15, var_part=None): """The number of principal components to be kept can be specified as 'output_dim' directly (e.g. 'output_dim=10' means 10 components are kept) or by the fraction of variance to be explained (e.g. 'output_dim=0.95' means that as many components as necessary will be kept in order to explain 95% of the input variance). Other Keyword Arguments: svd -- if True use Singular Value Decomposition instead of the standard eigenvalue problem solver. Use it when PCANode complains about singular covariance matrices reduce -- Keep only those principal components which have a variance larger than 'var_abs' and a variance relative to the first principal component larger than 'var_rel' and a variance relative to total variance larger than 'var_part' (set var_part to None or 0 for no filtering). Note: when the 'reduce' switch is enabled, the actual number of principal components (self.output_dim) may be different from that set when creating the instance. """ # this must occur *before* calling super! self.desired_variance = None super(PCANode, self).__init__(input_dim, output_dim, dtype) self.svd = svd # set routine for eigenproblem if svd: self._symeig = nongeneral_svd else: self._symeig = symeig self.var_abs = var_abs self.var_rel = var_rel self.var_part = var_part self.reduce = reduce # empirical covariance matrix, updated during the training phase self._cov_mtx = CovarianceMatrix(dtype) # attributes that defined in stop_training self.d = None # eigenvalues self.v = None # eigenvectors, first index for coordinates self.total_variance = None self.tlen = None self.avg = None self.explained_variance = None def _set_output_dim(self, n): if n <= 1 and isinstance(n, float): # set the output dim after training, when the variances are known self.desired_variance = n else: self._output_dim = n def _check_output(self, y): # check output rank if not y.ndim == 2: error_str = "y has rank %d, should be 2" % (y.ndim) raise mdp.NodeException(error_str) if y.shape[1] == 0 or y.shape[1] > self.output_dim: error_str = ("y has dimension %d" ", should be 0<y<=%d" % (y.shape[1], self.output_dim)) raise mdp.NodeException(error_str) def get_explained_variance(self): """Return the fraction of the original variance that can be explained by self._output_dim PCA components. If for example output_dim has been set to 0.95, the explained variance could be something like 0.958... Note that if output_dim was explicitly set to be a fixed number of components, there is no way to calculate the explained variance. """ return self.explained_variance def _train(self, x): # update the covariance matrix self._cov_mtx.update(x) def _adjust_output_dim(self): """Return the eigenvector range and set the output dim if required. This is used if the output dimensions is smaller than the input dimension (so only the larger eigenvectors have to be kept). """ # if the number of principal components to keep is not specified, # keep all components if self.desired_variance is None and self.output_dim is None: self.output_dim = self.input_dim return None ## define the range of eigenvalues to compute # if the number of principal components to keep has been # specified directly if self.output_dim is not None and self.output_dim >= 1: # (eigenvalues sorted in ascending order) return (self.input_dim - self.output_dim + 1, self.input_dim) # otherwise, the number of principal components to keep has been # specified by the fraction of variance to be explained else: return None def _stop_training(self, debug=False): """Stop the training phase. Keyword arguments: debug=True if stop_training fails because of singular cov matrices, the singular matrices itselves are stored in self.cov_mtx and self.dcov_mtx to be examined. """ # request the covariance matrix and clean up self.cov_mtx, avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # this is a bit counterintuitive, as it reshapes the average vector to # be a matrix. in this way, however, we spare the reshape # operation every time that 'execute' is called. self.avg = avg.reshape(1, avg.shape[0]) # range for the eigenvalues rng = self._adjust_output_dim() # if we have more variables then observations we are bound to fail here # suggest to use the NIPALSNode instead. if debug and self.tlen < self.input_dim: wrn = ('The number of observations (%d) ' 'is larger than the number of input variables ' '(%d). You may want to use ' 'the NIPALSNode instead.' % (self.tlen, self.input_dim)) _warnings.warn(wrn, mdp.MDPWarning) # total variance can be computed at this point: # note that vartot == d.sum() vartot = numx.diag(self.cov_mtx).sum() ## compute and sort the eigenvalues # compute the eigenvectors of the covariance matrix (inplace) # (eigenvalues sorted in ascending order) try: d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug)) # if reduce=False and svd=False. we should check for # negative eigenvalues and fail if not (self.reduce or self.svd or (self.desired_variance is not None)): if d.min() < 0: raise mdp.NodeException( "Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller, " "or set reduce=True and/or svd=True" % str(d)) except SymeigException, exception: err = str(exception) + ("\nCovariance matrix may be singular." "Try setting svd=True.") raise mdp.NodeException(err) # delete covariance matrix if no exception occurred if not debug: del self.cov_mtx # sort by descending order d = numx.take(d, range(d.shape[0] - 1, -1, -1)) v = v[:, ::-1] if self.desired_variance is not None: # throw away immediately negative eigenvalues d = d[d > 0] # the number of principal components to keep has # been specified by the fraction of variance to be explained varcum = (d / vartot).cumsum(axis=0) # select only the relevant eigenvalues # number of relevant eigenvalues neigval = varcum.searchsorted(self.desired_variance) + 1. #self.explained_variance = varcum[neigval-1] # cut d = d[0:neigval] v = v[:, 0:neigval] # define the new output dimension self.output_dim = int(neigval) # automatic dimensionality reduction if self.reduce: # remove entries that are smaller then var_abs and # smaller then var_rel relative to the maximum d = d[d > self.var_abs] # check that we did not throw away everything if len(d) == 0: raise mdp.NodeException('No eigenvalues larger than' ' var_abs=%e!' % self.var_abs) d = d[d / d.max() > self.var_rel] # filter for variance relative to total variance if self.var_part: d = d[d / vartot > self.var_part] v = v[:, 0:d.shape[0]] self._output_dim = d.shape[0] # set explained variance self.explained_variance = d.sum() / vartot # store the eigenvalues self.d = d # store the eigenvectors self.v = v # store the total variance self.total_variance = vartot
class PCANode(mdp.Node): """Filter the input data through the most significatives of its principal components. .. attribute:: avg Mean of the input data (available after training). .. attribute:: v Transposed of the projection matrix (available after training). .. attribute:: d Variance corresponding to the PCA components (eigenvalues of the covariance matrix). .. attribute:: explained_variance When output_dim has been specified as a fraction of the total variance, this is the fraction of the total variance that is actually explained. | .. admonition:: Reference More information about Principal Component Analysis, a.k.a. discrete Karhunen-Loeve transform can be found among others in I.T. Jolliffe, Principal Component Analysis, Springer-Verlag (1986). """ def __init__(self, input_dim=None, output_dim=None, dtype=None, svd=False, reduce=False, var_rel=1E-12, var_abs=1E-15, var_part=None): """Initializes an object of type 'PCANode'. The number of principal components to be kept can be specified as 'output_dim' directly (e.g. 'output_dim=10' means 10 components are kept) or by the fraction of variance to be explained (e.g. 'output_dim=0.95' means that as many components as necessary will be kept in order to explain 95% of the input variance). :param input_dim: Dimensionality of the input. Default is None. :type input_dim: int :param output_dim: Dimensionality of the output. Default is None. :type output_dim: int :param dtype: Datatype of the input. Default is None. :type dtype: numpy.dtype, str :param svd: If True use Singular Value Decomposition instead of the standard eigenvalue problem solver. Use it when PCANode complains about singular covariance matrices. Default is Flase. :type svd: bool :param reduce: Keep only those principal components which have a variance larger than 'var_abs' and a variance relative to the first principal component larger than 'var_rel' and a variance relative to total variance larger than 'var_part' (set var_part to None or 0 for no filtering). Default is False. :type reduce: bool .. note:: When the *reduce* switch is enabled, the actual number of principal components (self.output_dim) may be different from that set when creating the instance. :param var_rel: Variance relative to first principal component threshold. Default is 1E-12. :type var_rel: float :param var_abs: Absolute variance threshold. Default is 1E-15. :type var_abs: float :param var_part: Variance relative to total variance threshold. Default is None. :type var_part: float """ # this must occur *before* calling super! self.desired_variance = None super(PCANode, self).__init__(input_dim, output_dim, dtype) self.svd = svd # set routine for eigenproblem if svd: self._symeig = nongeneral_svd else: self._symeig = symeig self.var_abs = var_abs self.var_rel = var_rel self.var_part = var_part self.reduce = reduce # empirical covariance matrix, updated during the training phase self._cov_mtx = CovarianceMatrix(dtype) # attributes that defined in stop_training self.d = None # eigenvalues self.v = None # eigenvectors, first index for coordinates self.total_variance = None self.tlen = None self.avg = None self.explained_variance = None def _set_output_dim(self, n): if n <= 1 and isinstance(n, float): # set the output dim after training, when the variances are known self.desired_variance = n else: self._output_dim = n def _check_output(self, y): # check output rank if not y.ndim == 2: error_str = "y has rank %d, should be 2" % (y.ndim) raise mdp.NodeException(error_str) if y.shape[1] == 0 or y.shape[1] > self.output_dim: error_str = ("y has dimension %d" ", should be 0<y<=%d" % (y.shape[1], self.output_dim)) raise mdp.NodeException(error_str) def get_explained_variance(self): """The explained variance is the fraction of the original variance that can be explained by self._output_dim PCA components. If for example output_dim has been set to 0.95, the explained variance could be something like 0.958... .. note:: If output_dim was explicitly set to be a fixed number of components, there is no way to calculate the explained variance. :return: The explained variance. :rtype: float """ return self.explained_variance def _train(self, x): """Update the covariance matrix. :param x: The training data. :type x: numpy.ndarray """ self._cov_mtx.update(x) def _adjust_output_dim(self): """This function is used if the output dimensions is smaller than the input dimension (so only the larger eigenvectors have to be kept). If required it sets the output dim. :return: The eigenvector range. :rtype: tuple """ # if the number of principal components to keep is not specified, # keep all components if self.desired_variance is None and self.output_dim is None: self.output_dim = self.input_dim return None ## define the range of eigenvalues to compute # if the number of principal components to keep has been # specified directly if self.output_dim is not None and self.output_dim >= 1: # (eigenvalues sorted in ascending order) return (self.input_dim - self.output_dim + 1, self.input_dim) # otherwise, the number of principal components to keep has been # specified by the fraction of variance to be explained else: return None def _stop_training(self, debug=False): """Stop the training phase. :param debug: Determines if singular matrices itself are stored in self.cov_mtx and self.dcov_mtx to be examined, given that stop_training fails because of singular covmatrices. Default is False. :type debug: bool :raises mdp.NodeException: If negative eigenvalues occur, the covariance matrix may be singular or no component amounts to variation exceeding var_abs. """ # request the covariance matrix and clean up self.cov_mtx, avg, self.tlen = self._cov_mtx.fix() del self._cov_mtx # this is a bit counterintuitive, as it reshapes the average vector to # be a matrix. in this way, however, we spare the reshape # operation every time that 'execute' is called. self.avg = avg.reshape(1, avg.shape[0]) # range for the eigenvalues rng = self._adjust_output_dim() # if we have more variables then observations we are bound to fail here # suggest to use the NIPALSNode instead. if debug and self.tlen < self.input_dim: wrn = ('The number of observations (%d) ' 'is larger than the number of input variables ' '(%d). You may want to use ' 'the NIPALSNode instead.' % (self.tlen, self.input_dim)) _warnings.warn(wrn, mdp.MDPWarning) # total variance can be computed at this point: # note that vartot == d.sum() vartot = numx.diag(self.cov_mtx).sum() ## compute and sort the eigenvalues # compute the eigenvectors of the covariance matrix (inplace) # (eigenvalues sorted in ascending order) try: d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug)) # if reduce=False and svd=False. we should check for # negative eigenvalues and fail if not (self.reduce or self.svd or (self.desired_variance is not None)): if d.min() < 0: raise mdp.NodeException( "Got negative eigenvalues: %s.\n" "You may either set output_dim to be smaller, " "or set reduce=True and/or svd=True" % str(d)) except SymeigException as exception: err = str(exception)+("\nCovariance matrix may be singular." "Try setting svd=True.") raise mdp.NodeException(err) # delete covariance matrix if no exception occurred if not debug: del self.cov_mtx # sort by descending order d = numx.take(d, list(range(d.shape[0]-1, -1, -1))) v = v[:, ::-1] if self.desired_variance is not None: # throw away immediately negative eigenvalues d = d[ d > 0 ] # the number of principal components to keep has # been specified by the fraction of variance to be explained varcum = (old_div(d, vartot)).cumsum(axis=0) # select only the relevant eigenvalues # number of relevant eigenvalues neigval = int(varcum.searchsorted(self.desired_variance) + 1.) #self.explained_variance = varcum[neigval-1] # cut d = d[0:neigval] v = v[:, 0:neigval] # define the new output dimension self.output_dim = int(neigval) # automatic dimensionality reduction if self.reduce: # remove entries that are smaller then var_abs and # smaller then var_rel relative to the maximum d = d[ d > self.var_abs ] # check that we did not throw away everything if len(d) == 0: raise mdp.NodeException('No eigenvalues larger than' ' var_abs=%e!'%self.var_abs) d = d[ old_div(d, d.max()) > self.var_rel ] # filter for variance relative to total variance if self.var_part: d = d[ old_div(d, vartot) > self.var_part ] v = v[:, 0:d.shape[0]] self._output_dim = d.shape[0] # set explained variance self.explained_variance = old_div(d.sum(), vartot) # store the eigenvalues self.d = d # store the eigenvectors self.v = v # store the total variance self.total_variance = vartot def get_projmatrix(self, transposed=1): """Returns the projection matrix. :param transposed: Determines whether the transposed projection matrix is returned. Default is True. :type transposed: bool :return: The projection matrix. :rtype: numpy.ndarray """ self._if_training_stop_training() if transposed: return self.v return self.v.T def get_recmatrix(self, transposed=1): """Returns the the back-projection matrix (i.e. the reconstruction matrix). :param transposed: Determines whether the transposed back-projection matrix (i.e. the reconstruction matrix) is returned. Default is True. :type transposed: bool :return: The back-projection matrix (i.e. the reconstruction matrix). :rtype: numpy.ndarray """ self._if_training_stop_training() if transposed: return self.v.T return self.v def _execute(self, x, n=None): """Project the input on the first 'n' principal components. If 'n' is not set, use all available components. :param x: Input with at least 'n' principle components. :type x: numpy.ndarray :param n: Number of first principle components. :type n: int :return: The projected input. :rtype: numpy.ndarray """ if n is not None: return mult(x-self.avg, self.v[:, :n]) return mult(x-self.avg, self.v) def _inverse(self, y, n=None): """Project data from the output to the input space using the first 'n' components. If 'n' is not set, use all available components. :param y: Data to be projected to the input space. :type y: numpy.ndarray :param n: Number of first principle components. :type n: int :return: The projected data :rtype: numpy.ndarray """ if n is None: n = y.shape[1] if n > self.output_dim: error_str = ("y has dimension %d," " should be at most %d" % (n, self.output_dim)) raise mdp.NodeException(error_str) v = self.get_recmatrix() if n is not None: return mult(y, v[:n, :]) + self.avg return mult(y, v) + self.avg