Ejemplo n.º 1
0
    def __init__(self,
                 input_dim=None,
                 output_dim=None,
                 dtype=None,
                 include_last_sample=True):
        """
        For the ``include_last_sample`` switch have a look at the
        SFANode class docstring.
         """
        super(SFANode, self).__init__(input_dim, output_dim, dtype)
        self._include_last_sample = include_last_sample

        # init two covariance matrices
        # one for the input data
        self._cov_mtx = CovarianceMatrix(dtype)
        # one for the derivatives
        self._dcov_mtx = CovarianceMatrix(dtype)

        # set routine for eigenproblem
        self._symeig = symeig

        # SFA eigenvalues and eigenvectors, will be set after training
        self.d = None
        self.sf = None  # second index for outputs
        self.avg = None
        self._bias = None  # avg multiplied with sf
        self.tlen = None
Ejemplo n.º 2
0
    def __init__(self,
                 tol=1e-4,
                 max_cycles=100,
                 verbose=False,
                 input_dim=None,
                 output_dim=None,
                 dtype=None):
        """Initializes an object of type 'FANode'.
        
        :param tol: Tolerance (minimum change in log-likelihood before exiting
            the EM algorithm).
        :type tol: float
        
        :param max_cycles: Maximum number of EM cycles/
        :type max_cycles: int
        
        :param verbose: If true, print log-likelihood during the EM-cycles.
        :type verbose: bool
        
        :param input_dim: The input dimensionality.
        :type input_dim: int
        
        :param output_dim: The output dimensionality.
        :type output_dim: int
        
        :param dtype: The datatype.
        :type dtype: numpy.dtype or str
        """

        # Notation as in Max Welling's notes
        super(FANode, self).__init__(input_dim, output_dim, dtype)
        self.tol = tol
        self.max_cycles = max_cycles
        self.verbose = verbose
        self._cov_mtx = CovarianceMatrix(dtype, bias=True)
Ejemplo n.º 3
0
    def __init__(self,
                 input_dim=None,
                 output_dim=None,
                 dtype=None,
                 svd=False,
                 reduce=False,
                 var_rel=1E-12,
                 var_abs=1E-15,
                 var_part=None):
        """The number of principal components to be kept can be specified as
        'output_dim' directly (e.g. 'output_dim=10' means 10 components
        are kept) or by the fraction of variance to be explained
        (e.g. 'output_dim=0.95' means that as many components as necessary
        will be kept in order to explain 95% of the input variance).

        Other Keyword Arguments:

        svd -- if True use Singular Value Decomposition instead of the
               standard eigenvalue problem solver. Use it when PCANode
               complains about singular covariance matrices

        reduce -- Keep only those principal components which have a variance
                  larger than 'var_abs' and a variance relative to the
                  first principal component larger than 'var_rel' and a
                  variance relative to total variance larger than 'var_part'
                  (set var_part to None or 0 for no filtering).
                  Note: when the 'reduce' switch is enabled, the actual number
                  of principal components (self.output_dim) may be different
                  from that set when creating the instance.
        """
        # this must occur *before* calling super!
        self.desired_variance = None
        super(PCANode, self).__init__(input_dim, output_dim, dtype)
        self.svd = svd
        # set routine for eigenproblem
        if svd:
            self._symeig = nongeneral_svd
        else:
            self._symeig = symeig
        self.var_abs = var_abs
        self.var_rel = var_rel
        self.var_part = var_part
        self.reduce = reduce
        # empirical covariance matrix, updated during the training phase
        self._cov_mtx = CovarianceMatrix(dtype)
        # attributes that defined in stop_training
        self.d = None  # eigenvalues
        self.v = None  # eigenvectors, first index for coordinates
        self.total_variance = None
        self.tlen = None
        self.avg = None
        self.explained_variance = None
Ejemplo n.º 4
0
    def __init__(self, tol=1e-4, max_cycles=100, verbose=False,
                 input_dim=None, output_dim=None, dtype=None):
        """Initializes an object of type 'FANode'.
        
        :param tol: Tolerance (minimum change in log-likelihood before exiting
            the EM algorithm).
        :type tol: float
        
        :param max_cycles: Maximum number of EM cycles/
        :type max_cycles: int
        
        :param verbose: If true, print log-likelihood during the EM-cycles.
        :type verbose: bool
        
        :param input_dim: The input dimensionality.
        :type input_dim: int
        
        :param output_dim: The output dimensionality.
        :type output_dim: int
        
        :param dtype: The datatype.
        :type dtype: numpy.dtype or str
        """

        # Notation as in Max Welling's notes
        super(FANode, self).__init__(input_dim, output_dim, dtype)
        self.tol = tol
        self.max_cycles = max_cycles
        self.verbose = verbose
        self._cov_mtx = CovarianceMatrix(dtype, bias=True)
Ejemplo n.º 5
0
    def __init__(self, tol=1e-4, max_cycles=100, verbose=False,
                 input_dim=None, output_dim=None, dtype=None):

        """
        :Parameters:
          tol
            tolerance (minimum change in log-likelihood before exiting
            the EM algorithm)
          max_cycles
            maximum number of EM cycles
          verbose
            if true, print log-likelihood during the EM-cycles
        """
        # Notation as in Max Welling's notes
        super(FANode, self).__init__(input_dim, output_dim, dtype)
        self.tol = tol
        self.max_cycles = max_cycles
        self.verbose = verbose
        self._cov_mtx = CovarianceMatrix(dtype, bias=True)
Ejemplo n.º 6
0
    def __init__(
        self,
        input_dim=None,
        output_dim=None,
        dtype=None,
        svd=False,
        reduce=False,
        var_rel=1e-12,
        var_abs=1e-15,
        var_part=None,
    ):
        """The number of principal components to be kept can be specified as
        'output_dim' directly (e.g. 'output_dim=10' means 10 components
        are kept) or by the fraction of variance to be explained
        (e.g. 'output_dim=0.95' means that as many components as necessary
        will be kept in order to explain 95% of the input variance).

        Other Keyword Arguments:

        svd -- if True use Singular Value Decomposition instead of the
               standard eigenvalue problem solver. Use it when PCANode
               complains about singular covariance matrices

        reduce -- Keep only those principal components which have a variance
                  larger than 'var_abs' and a variance relative to the
                  first principal component larger than 'var_rel' and a
                  variance relative to total variance larger than 'var_part'
                  (set var_part to None or 0 for no filtering).
                  Note: when the 'reduce' switch is enabled, the actual number
                  of principal components (self.output_dim) may be different
                  from that set when creating the instance.
        """
        # this must occur *before* calling super!
        self.desired_variance = None
        super(PCANode, self).__init__(input_dim, output_dim, dtype)
        self.svd = svd
        # set routine for eigenproblem
        if svd:
            self._symeig = nongeneral_svd
        else:
            self._symeig = symeig
        self.var_abs = var_abs
        self.var_rel = var_rel
        self.var_part = var_part
        self.reduce = reduce
        # empirical covariance matrix, updated during the training phase
        self._cov_mtx = CovarianceMatrix(dtype)
        # attributes that defined in stop_training
        self.d = None  # eigenvalues
        self.v = None  # eigenvectors, first index for coordinates
        self.total_variance = None
        self.tlen = None
        self.avg = None
        self.explained_variance = None
Ejemplo n.º 7
0
    def __init__(self, input_dim=None, output_dim=None, dtype=None,
                 include_last_sample=True):
        """
        For the ``include_last_sample`` switch have a look at the
        SFANode class docstring.
         """
        super(SFANode, self).__init__(input_dim, output_dim, dtype)
        self._include_last_sample = include_last_sample

        # init two covariance matrices
        # one for the input data
        self._cov_mtx = CovarianceMatrix(dtype)
        # one for the derivatives
        self._dcov_mtx = CovarianceMatrix(dtype)

        # set routine for eigenproblem
        self._symeig = symeig

        # SFA eigenvalues and eigenvectors, will be set after training
        self.d = None
        self.sf = None  # second index for outputs
        self.avg = None
        self._bias = None  # avg multiplied with sf
        self.tlen = None
Ejemplo n.º 8
0
def _get_iterative_cov(layer, batch, conv_method: str = 'median'):

    #batch = batch[-1]

    if len(batch.shape) == 4:  # conv layer (B x C x H x W)
        if conv_method == 'median':
            batch = np.median(batch, axis=(2, 3))  # channel median
        elif conv_method == 'max':
            batch = np.max(batch, axis=(2, 3))  # channel median
        elif conv_method == 'mean':
            batch = np.mean(batch, axis=(2, 3))

    if not layer in COVARIANCE_MATRICES:
        COVARIANCE_MATRICES[layer] = CovarianceMatrix()
        COVARIANCE_MATRICES[layer]._init_internals(batch)
    else:
        COVARIANCE_MATRICES[layer].update(batch)
    return COVARIANCE_MATRICES[layer]._cov_mtx
Ejemplo n.º 9
0
    def __init__(self, tol=1e-4, max_cycles=100, verbose=False, input_dim=None, output_dim=None, dtype=None):

        """
        :Parameters:
          tol
            tolerance (minimum change in log-likelihood before exiting
            the EM algorithm)
          max_cycles
            maximum number of EM cycles
          verbose
            if true, print log-likelihood during the EM-cycles
        """
        # Notation as in Max Welling's notes
        super(FANode, self).__init__(input_dim, output_dim, dtype)
        self.tol = tol
        self.max_cycles = max_cycles
        self.verbose = verbose
        self._cov_mtx = CovarianceMatrix(dtype, bias=True)
Ejemplo n.º 10
0
        def record_layer_saturation(layer: torch.nn.Module, input, output):
            """Hook to register in `layer` module."""

            # Increment step counter
            layer.forward_iter += 1
            if layer.forward_iter % layer.interval == 0:
                activations_batch = output.data.cpu().numpy()
                training_state = 'train' if layer.training else 'eval'
                layer_history = setattr(layer,
                                        f'{training_state}_layer_history',
                                        activations_batch)
                eig_vals = None
                if 'lsat' in stats:
                    training_state = 'train' if layer.training else 'eval'

                    if len(activations_batch.shape
                           ) == 4:  # conv layer (B x C x H x W)
                        if self.conv_method == 'median':
                            activations_batch = np.median(
                                activations_batch,
                                axis=(2, 3))  # channel median
                        elif self.conv_method == 'max':
                            activations_batch = np.max(
                                activations_batch,
                                axis=(2, 3))  # channel median
                        elif self.conv_method == 'mean':
                            activations_batch = np.mean(activations_batch,
                                                        axis=(2, 3))

                    if layer.name in self.logs[f'{training_state}-saturation']:
                        self.logs[f'{training_state}-saturation'][
                            layer.name].update(activations_batch)
                    else:
                        self.logs[f'{training_state}-saturation'][
                            layer.name] = CovarianceMatrix()
                        self.logs[f'{training_state}-saturation'][
                            layer.name]._init_internals(activations_batch)
Ejemplo n.º 11
0
    def __init__(self, input_dim=None, output_dim=None, dtype=None,
                 include_last_sample=True, rank_deficit_method='none'):
        """
        Initialize an object of type 'SFANode'.

        :param input_dim: The input dimensionality.
        :type input_dim: int
        
        :param output_dim: The output dimensionality.
        :type output_dim: int
        
        :param dtype: The datatype.
        :type dtype: numpy.dtype or str
        
        :param include_last_sample: If ``False`` the `train` method discards the 
            last sample in every chunk during training when calculating 
            the covariance matrix.
            The last sample is in this case only used for calculating the
            covariance matrix of the derivatives. The switch should be set
            to ``False`` if you plan to train with several small chunks. For
            example we can split a sequence (index is time)::

                x_1 x_2 x_3 x_4
    
            in smaller parts like this::

                x_1 x_2
                x_2 x_3
                x_3 x_4

            The SFANode will see 3 derivatives for the temporal covariance
            matrix, and the first 3 points for the spatial covariance matrix.
            Of course you will need to use a generator that *connects* the
            small chunks (the last sample needs to be sent again in the next
            chunk). If ``include_last_sample`` was True, depending on the
            generator you use, you would either get::

                x_1 x_2
                x_2 x_3
                x_3 x_4

            in which case the last sample of every chunk would be used twice
            when calculating the covariance matrix, or::

                x_1 x_2
                x_3 x_4

            in which case you loose the derivative between ``x_3`` and ``x_2``.

            If you plan to train with a single big chunk leave
            ``include_last_sample`` to the default value, i.e. ``True``.

            You can even change this behaviour during training. Just set the
            corresponding switch in the `train` method.
        :type include_last_sample: bool
        
        :param rank_deficit_method: Possible values: 'none' (default), 'reg', 'pca', 'svd', 'auto'
            If not 'none', the ``stop_train`` method solves the SFA eigenvalue
            problem in a way that is robust against linear redundancies in
            the input data. This would otherwise lead to rank deficit in the
            covariance matrix, which usually yields a
            SymeigException ('Covariance matrices may be singular').
            There are several solving methods implemented:

            reg  - works by regularization
            pca  - works by PCA
            svd  - works by SVD
            ldl  - works by LDL decomposition (requires SciPy >= 1.0)

            auto - (Will be: selects the best-benchmarked method of the above)
                   Currently it simply selects pca.

            Note: If you already received an exception
            SymeigException ('Covariance matrices may be singular')
            you can manually set the solving method for an existing node::

               sfa.set_rank_deficit_method('pca')

            That means,::

               sfa = SFANode(rank_deficit='pca')

            is equivalent to::

               sfa = SFANode()
               sfa.set_rank_deficit_method('pca')

            After such an adjustment you can run ``stop_training()`` again,
            which would save a potentially time-consuming rerun of all
            ``train()`` calls.
        :type rank_deficit_method: str
        """
        super(SFANode, self).__init__(input_dim, output_dim, dtype)
        self._include_last_sample = include_last_sample

        # init two covariance matrices
        # one for the input data
        self._cov_mtx = CovarianceMatrix(dtype)
        # one for the derivatives
        self._dcov_mtx = CovarianceMatrix(dtype)

        # set routine for eigenproblem
        self.set_rank_deficit_method(rank_deficit_method)
        self.rank_threshold = 1e-12
        self.rank_deficit = 0

        # SFA eigenvalues and eigenvectors, will be set after training
        self.d = None
        self.sf = None  # second index for outputs
        self.avg = None
        self._bias = None  # avg multiplied with sf
        self.tlen = None
Ejemplo n.º 12
0
class SFANode(Node):
    """Extract the slowly varying components from the input data.

    :ivar avg: Mean of the input data (available after training)
    :vartype avg: numpy.ndarray

    :ivar sf: Matrix of the SFA filters (available after training)
    :vartype sf: numpy.ndarray

    :ivar d: Delta values corresponding to the SFA components (generalized
        eigenvalues). [See the docs of the ``get_eta_values`` method for
        more information]
    :vartype d: numpy.ndarray

    .. admonition:: Reference

        More information about Slow Feature Analysis can be found in
        Wiskott, L. and Sejnowski, T.J., Slow Feature Analysis: Unsupervised
        Learning of Invariances, Neural Computation, 14(4):715-770 (2002).
    """
    def __init__(self,
                 input_dim=None,
                 output_dim=None,
                 dtype=None,
                 include_last_sample=True,
                 rank_deficit_method='none'):
        """Initialize an object of type 'SFANode'.

        :param input_dim: The input dimensionality.
        :type input_dim: int

        :param output_dim: The output dimensionality.
        :type output_dim: int

        :param dtype: The datatype.
        :type dtype: numpy.dtype or str

        :param include_last_sample: If ``False`` the `train` method discards
            the last sample in every chunk during training when calculating
            the covariance matrix.
            The last sample is in this case only used for calculating the
            covariance matrix of the derivatives. The switch should be set
            to ``False`` if you plan to train with several small chunks. For
            example we can split a sequence (index is time)::

                x_1 x_2 x_3 x_4

            in smaller parts like this::

                x_1 x_2
                x_2 x_3
                x_3 x_4

            The SFANode will see 3 derivatives for the temporal covariance
            matrix, and the first 3 points for the spatial covariance matrix.
            Of course you will need to use a generator that *connects* the
            small chunks (the last sample needs to be sent again in the next
            chunk). If ``include_last_sample`` was True, depending on the
            generator you use, you would either get::

                x_1 x_2
                x_2 x_3
                x_3 x_4

            in which case the last sample of every chunk would be used twice
            when calculating the covariance matrix, or::

                x_1 x_2
                x_3 x_4

            in which case you loose the derivative between ``x_3`` and ``x_2``.

            If you plan to train with a single big chunk leave
            ``include_last_sample`` to the default value, i.e. ``True``.

            You can even change this behaviour during training. Just set the
            corresponding switch in the `train` method.
        :type include_last_sample: bool

        :param rank_deficit_method: Possible values: 'none' (default), 'reg', 'pca', 'svd', 'auto'
            If not 'none', the ``stop_train`` method solves the SFA eigenvalue
            problem in a way that is robust against linear redundancies in
            the input data. This would otherwise lead to rank deficit in the
            covariance matrix, which usually yields a
            SymeigException ('Covariance matrices may be singular').
            There are several solving methods implemented:

            reg  - works by regularization
            pca  - works by PCA
            svd  - works by SVD
            ldl  - works by LDL decomposition (requires SciPy >= 1.0)

            auto - (Will be: selects the best-benchmarked method of the above)
                   Currently it simply selects pca.

            Note: If you already received an exception
            SymeigException ('Covariance matrices may be singular')
            you can manually set the solving method for an existing node::

               sfa.set_rank_deficit_method('pca')

            That means,::

               sfa = SFANode(rank_deficit='pca')

            is equivalent to::

               sfa = SFANode()
               sfa.set_rank_deficit_method('pca')

            After such an adjustment you can run ``stop_training()`` again,
            which would save a potentially time-consuming rerun of all
            ``train()`` calls.
        :type rank_deficit_method: str
        """
        super(SFANode, self).__init__(input_dim, output_dim, dtype)
        self._include_last_sample = include_last_sample

        # init two covariance matrices
        self._init_cov()

        # set routine for eigenproblem
        self.set_rank_deficit_method(rank_deficit_method)
        self.rank_threshold = 1e-12
        self.rank_deficit = 0

        # SFA eigenvalues and eigenvectors, will be set after training
        self.d = None
        self.sf = None  # second index for outputs
        self.avg = None
        self._bias = None  # avg multiplied with sf
        self.tlen = None

    def _init_cov(self):
        # init two covariance matrices
        # one for the input data
        self._cov_mtx = CovarianceMatrix(self.dtype)
        # one for the input data
        self._dcov_mtx = CovarianceMatrix(self.dtype)

    def set_rank_deficit_method(self, rank_deficit_method):
        if rank_deficit_method == 'pca':
            self._symeig = symeig_semidefinite_pca
        elif rank_deficit_method == 'reg':
            self._symeig = symeig_semidefinite_reg
        elif rank_deficit_method == 'svd':
            self._symeig = symeig_semidefinite_svd
        elif rank_deficit_method == 'ldl':
            try:
                from scipy.linalg.lapack import dsytrf
            except ImportError:
                err_msg = (
                    "ldl method for solving SFA with rank deficit covariance "
                    "requires at least SciPy 1.0.")
                raise NodeException(err_msg)
            self._symeig = symeig_semidefinite_ldl
        elif rank_deficit_method == 'auto':
            self._symeig = symeig_semidefinite_pca
        elif rank_deficit_method == 'none':
            self._symeig = symeig
        else:
            raise ValueError("Invalid value for rank_deficit_method: %s" %
                             str(rank_deficit_method))

    def time_derivative(self, x):
        """Compute the linear approximation of the time derivative

        :param x: The time series data.
        :type x: numpy.ndarray

        :returns: Piecewise linear approximation of the time derivative.
        :rtype: numpy.ndarray
        """
        # this is faster than a linear_filter or a weave-inline solution
        return x[1:, :] - x[:-1, :]

    def _set_range(self):
        if self.output_dim is not None and self.output_dim <= self.input_dim:
            # (eigenvalues sorted in ascending order)
            rng = (1, self.output_dim)
        else:
            # otherwise, keep all output components
            rng = None
            self.output_dim = self.input_dim
        return rng

    def _check_train_args(self, x, *args, **kwargs):
        """Raises exception if time dimension does not have enough elements.

        :param x: The time series data.
        :type x: numpy.ndarray

        :param *args:
        :param **kwargs:
        """
        # check that we have at least 2 time samples to
        # compute the update for the derivative covariance matrix
        s = x.shape[0]
        if s < 2:
            raise TrainingException('Need at least 2 time samples to '
                                    'compute time derivative (%d given)' % s)

    def _train(self, x, include_last_sample=None):
        """Training method.

        :param x: The time series data.
        :type x: numpy.ndarray

        :param include_last_sample: For the ``include_last_sample`` switch have a
            look at the SFANode.__init__ docstring.
        :type include_last_sample: bool
        """
        if include_last_sample is None:
            include_last_sample = self._include_last_sample
        # works because x[:None] == x[:]
        last_sample_index = None if include_last_sample else -1

        # update the covariance matrices
        self._cov_mtx.update(x[:last_sample_index, :])
        self._dcov_mtx.update(self.time_derivative(x))

    def _stop_training(self, debug=False):
        # request the covariance matrices and clean up
        if hasattr(self, '_dcov_mtx'):
            self.cov_mtx, self.avg, self.tlen = self._cov_mtx.fix()
            del self._cov_mtx
        # do not center around the mean:
        # we want the second moment matrix (centered about 0) and
        # not the second central moment matrix (centered about the mean), i.e.
        # the covariance matrix
        if hasattr(self, '_dcov_mtx'):
            self.dcov_mtx, self.davg, self.dtlen = self._dcov_mtx.fix(
                center=False)
            del self._dcov_mtx

        rng = self._set_range()

        # solve the generalized eigenvalue problem
        # the eigenvalues are already ordered in ascending order
        try:
            try:
                # We first try to fulfill the extended signature described
                # in mdp.utils.symeig_semidefinite
                self.d, self.sf = self._symeig(
                    self.dcov_mtx,
                    self.cov_mtx,
                    True,
                    "on",
                    rng,
                    overwrite=(not debug),
                    rank_threshold=self.rank_threshold,
                    dfc_out=self)
            except TypeError:
                self.d, self.sf = self._symeig(self.dcov_mtx,
                                               self.cov_mtx,
                                               True,
                                               "on",
                                               rng,
                                               overwrite=(not debug))
            d = self.d
            # check that we get only *positive* eigenvalues
            if d.min() < 0:
                err_msg = (
                    "Got negative eigenvalues: %s.\n"
                    "You may either set output_dim to be smaller,\n"
                    "or prepend the SFANode with a PCANode(reduce=True)\n"
                    "or PCANode(svd=True)\n"
                    "or set a rank deficit method, e.g.\n"
                    "create the SFA node with rank_deficit_method='auto'\n"
                    "and try higher values for rank_threshold, e.g. try\n"
                    "your_node.rank_threshold = 1e-10, 1e-8, 1e-6, ..." %
                    str(d))
                raise NodeException(err_msg)
        except SymeigException as exception:
            errstr = (str(exception) +
                      "\n Covariance matrices may be singular.\n" +
                      SINGULAR_VALUE_MSG)
            raise NodeException(errstr)

        if not debug:
            # delete covariance matrix if no exception occurred
            del self.cov_mtx
            del self.dcov_mtx

        # store bias
        self._bias = mult(self.avg, self.sf)

    def _execute(self, x, n=None):
        """Compute the output of the slowest functions.

        :param x: The time series data.
        :type x: numpy.ndarray

        :param n: The number of slowest components. If 'n' is an integer,
            then use the first 'n' slowest components.
        :type n: int

        :returns: The output of the slowest functions.
        :rtype: numpy.ndarray
        """
        if n:
            sf = self.sf[:, :n]
            bias = self._bias[:n]
        else:
            sf = self.sf
            bias = self._bias
        return mult(x, sf) - bias

    def _inverse(self, y):
        return mult(y, pinv(self.sf)) + self.avg

    def get_eta_values(self, t=1):
        """Return the eta values of the slow components learned during
        the training phase. If the training phase has not been completed
        yet, call `stop_training`.

        The delta value of a signal is a measure of its temporal
        variation, and is defined as the mean of the derivative squared,
        i.e. delta(x) = mean(dx/dt(t)^2).  delta(x) is zero if
        x is a constant signal, and increases if the temporal variation
        of the signal is bigger.

        The eta value is a more intuitive measure of temporal variation,
        defined as
        eta(x) = t/(2*pi) * sqrt(delta(x))
        If x is a signal of length 't' which consists of a sine function
        that accomplishes exactly N oscillations, then eta(x)=N.

        :param t: Sampling frequency in Hz.

            The original definition in (Wiskott and Sejnowski, 2002)
            is obtained for t = number of training data points, while
            for t=1 (default), this corresponds to the beta-value defined
            in (Berkes and Wiskott, 2005).

        :returns: The eta values of the slow components learned during
            the training phase.
        """
        if self.is_training():
            self.stop_training()
        return self._refcast(t / (2 * numx.pi) * numx.sqrt(self.d))
Ejemplo n.º 13
0
 def _init_cov(self):
     # init two covariance matrices
     # one for the input data
     self._cov_mtx = CovarianceMatrix(self.dtype)
     # one for the input data
     self._dcov_mtx = CovarianceMatrix(self.dtype)
Ejemplo n.º 14
0
    def __init__(self,
                 input_dim=None,
                 output_dim=None,
                 dtype=None,
                 svd=False,
                 reduce=False,
                 var_rel=1E-12,
                 var_abs=1E-15,
                 var_part=None):
        """Initializes an object of type 'PCANode'.

        The number of principal components to be kept can be specified as
        'output_dim' directly (e.g. 'output_dim=10' means 10 components
        are kept) or by the fraction of variance to be explained
        (e.g. 'output_dim=0.95' means that as many components as necessary
        will be kept in order to explain 95% of the input variance).
        
        :param input_dim: Dimensionality of the input.
            Default is None.
        :type input_dim: int
        
        :param output_dim: Dimensionality of the output.
            Default is None.
        :type output_dim: int
        
        :param dtype: Datatype of the input.
            Default is None.
        :type dtype: numpy.dtype, str
        
        :param svd: If True use Singular Value Decomposition instead of the
            standard eigenvalue problem solver. Use it when PCANode
            complains about singular covariance matrices.
            Default is Flase.
        :type svd: bool
        
        :param reduce: Keep only those principal components which have a variance
            larger than 'var_abs' and a variance relative to the
            first principal component larger than 'var_rel' and a
            variance relative to total variance larger than 'var_part'
            (set var_part to None or 0 for no filtering).
            Default is False.
        :type reduce: bool
            
        .. note:: 
            When the *reduce* switch is enabled, the actual number
            of principal components (self.output_dim) may be different
            from that set when creating the instance.
            
        :param var_rel: Variance relative to first principal component threshold.
            Default is 1E-12.
        :type var_rel: float
        
        :param var_abs: Absolute variance threshold.
            Default is 1E-15.
        :type var_abs: float
        
        :param var_part: Variance relative to total variance threshold.
            Default is None.
        :type var_part: float
        """

        # this must occur *before* calling super!
        self.desired_variance = None
        super(PCANode, self).__init__(input_dim, output_dim, dtype)
        self.svd = svd
        # set routine for eigenproblem
        if svd:
            self._symeig = nongeneral_svd
        else:
            self._symeig = symeig
        self.var_abs = var_abs
        self.var_rel = var_rel
        self.var_part = var_part
        self.reduce = reduce
        # empirical covariance matrix, updated during the training phase
        self._cov_mtx = CovarianceMatrix(dtype)
        # attributes that defined in stop_training
        self.d = None  # eigenvalues
        self.v = None  # eigenvectors, first index for coordinates
        self.total_variance = None
        self.tlen = None
        self.avg = None
        self.explained_variance = None
Ejemplo n.º 15
0
class PCANode(mdp.Node):
    """Filter the input data through the most significatives of its
    principal components.
    
    :ivar avg: Mean of the input data (available after training).
        
    :ivar v: Transposed of the projection matrix (available after training).

    :ivar d: Variance corresponding to the PCA components (eigenvalues of the
        covariance matrix).

    :ivar explained_variance: When output_dim has been specified as a fraction
        of the total variance, this is the fraction of the total variance that is
        actually explained.
    
    |
    
    .. admonition:: Reference
    
        More information about Principal Component Analysis, a.k.a. discrete
        Karhunen-Loeve transform can be found among others in
        I.T. Jolliffe, Principal Component Analysis, Springer-Verlag (1986).
    """
    def __init__(self,
                 input_dim=None,
                 output_dim=None,
                 dtype=None,
                 svd=False,
                 reduce=False,
                 var_rel=1E-12,
                 var_abs=1E-15,
                 var_part=None):
        """Initializes an object of type 'PCANode'.

        The number of principal components to be kept can be specified as
        'output_dim' directly (e.g. 'output_dim=10' means 10 components
        are kept) or by the fraction of variance to be explained
        (e.g. 'output_dim=0.95' means that as many components as necessary
        will be kept in order to explain 95% of the input variance).
        
        :param input_dim: Dimensionality of the input.
            Default is None.
        :type input_dim: int
        
        :param output_dim: Dimensionality of the output.
            Default is None.
        :type output_dim: int
        
        :param dtype: Datatype of the input.
            Default is None.
        :type dtype: numpy.dtype, str
        
        :param svd: If True use Singular Value Decomposition instead of the
            standard eigenvalue problem solver. Use it when PCANode
            complains about singular covariance matrices.
            Default is Flase.
        :type svd: bool
        
        :param reduce: Keep only those principal components which have a variance
            larger than 'var_abs' and a variance relative to the
            first principal component larger than 'var_rel' and a
            variance relative to total variance larger than 'var_part'
            (set var_part to None or 0 for no filtering).
            Default is False.
        :type reduce: bool
            
        .. note:: 
            When the *reduce* switch is enabled, the actual number
            of principal components (self.output_dim) may be different
            from that set when creating the instance.
            
        :param var_rel: Variance relative to first principal component threshold.
            Default is 1E-12.
        :type var_rel: float
        
        :param var_abs: Absolute variance threshold.
            Default is 1E-15.
        :type var_abs: float
        
        :param var_part: Variance relative to total variance threshold.
            Default is None.
        :type var_part: float
        """

        # this must occur *before* calling super!
        self.desired_variance = None
        super(PCANode, self).__init__(input_dim, output_dim, dtype)
        self.svd = svd
        # set routine for eigenproblem
        if svd:
            self._symeig = nongeneral_svd
        else:
            self._symeig = symeig
        self.var_abs = var_abs
        self.var_rel = var_rel
        self.var_part = var_part
        self.reduce = reduce
        # empirical covariance matrix, updated during the training phase
        self._cov_mtx = CovarianceMatrix(dtype)
        # attributes that defined in stop_training
        self.d = None  # eigenvalues
        self.v = None  # eigenvectors, first index for coordinates
        self.total_variance = None
        self.tlen = None
        self.avg = None
        self.explained_variance = None

    def _set_output_dim(self, n):
        if n <= 1 and isinstance(n, float):
            # set the output dim after training, when the variances are known
            self.desired_variance = n
        else:
            self._output_dim = n

    def _check_output(self, y):
        # check output rank
        if not y.ndim == 2:
            error_str = "y has rank %d, should be 2" % (y.ndim)
            raise mdp.NodeException(error_str)

        if y.shape[1] == 0 or y.shape[1] > self.output_dim:
            error_str = ("y has dimension %d"
                         ", should be 0<y<=%d" % (y.shape[1], self.output_dim))
            raise mdp.NodeException(error_str)

    def get_explained_variance(self):
        """The explained variance is the  fraction of the original variance
        that can be explained by self._output_dim PCA components. If for
        example output_dim has been set to 0.95, the explained variance could
        be something like 0.958...
        
        .. note::
            If output_dim was explicitly set to be a fixed number
            of components, there is no way to calculate the explained variance.
        
        :return: The explained variance.
        :rtype: float
        """

        return self.explained_variance

    def _train(self, x):
        """Update the covariance matrix.
        
        :param x: The training data.
        :type x: numpy.ndarray
        """
        self._cov_mtx.update(x)

    def _adjust_output_dim(self):
        """This function is used if the output dimensions is smaller than the input
        dimension (so only the larger eigenvectors have to be kept). If required it
        sets the output dim.
        
        :return: The eigenvector range.
        :rtype: tuple
        """
        # if the number of principal components to keep is not specified,
        # keep all components
        if self.desired_variance is None and self.output_dim is None:
            self.output_dim = self.input_dim
            return None

        ## define the range of eigenvalues to compute
        # if the number of principal components to keep has been
        # specified directly
        if self.output_dim is not None and self.output_dim >= 1:
            # (eigenvalues sorted in ascending order)
            return (self.input_dim - self.output_dim + 1, self.input_dim)
        # otherwise, the number of principal components to keep has been
        # specified by the fraction of variance to be explained
        else:
            return None

    def _stop_training(self, debug=False):
        """Stop the training phase.
        
        :param debug: Determines if singular matrices itself are stored in
            self.cov_mtx and self.dcov_mtx to be examined, given that
            stop_training fails because of singular covmatrices.
            Default is False.
        :type debug: bool
        
        :raises mdp.NodeException: If negative eigenvalues occur, 
            the covariance matrix may be singular or no component
            amounts to variation exceeding var_abs. 
        """

        # request the covariance matrix and clean up
        self.cov_mtx, avg, self.tlen = self._cov_mtx.fix()
        del self._cov_mtx

        # this is a bit counterintuitive, as it reshapes the average vector to
        # be a matrix. in this way, however, we spare the reshape
        # operation every time that 'execute' is called.
        self.avg = avg.reshape(1, avg.shape[0])

        # range for the eigenvalues
        rng = self._adjust_output_dim()

        # if we have more variables then observations we are bound to fail here
        # suggest to use the NIPALSNode instead.
        if debug and self.tlen < self.input_dim:
            wrn = ('The number of observations (%d) '
                   'is larger than the number of input variables '
                   '(%d). You may want to use '
                   'the NIPALSNode instead.' % (self.tlen, self.input_dim))
            _warnings.warn(wrn, mdp.MDPWarning)

        # total variance can be computed at this point:
        # note that vartot == d.sum()
        vartot = numx.diag(self.cov_mtx).sum()

        ## compute and sort the eigenvalues
        # compute the eigenvectors of the covariance matrix (inplace)
        # (eigenvalues sorted in ascending order)
        try:
            d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug))
            # if reduce=False and svd=False. we should check for
            # negative eigenvalues and fail
            if not (self.reduce or self.svd or
                    (self.desired_variance is not None)):
                if d.min() < 0:
                    raise mdp.NodeException(
                        "Got negative eigenvalues: %s.\n"
                        "You may either set output_dim to be smaller, "
                        "or set reduce=True and/or svd=True" % str(d))
        except SymeigException as exception:
            err = str(exception) + ("\nCovariance matrix may be singular."
                                    "Try setting svd=True.")
            raise mdp.NodeException(err)

        # delete covariance matrix if no exception occurred
        if not debug:
            del self.cov_mtx

        # sort by descending order
        d = numx.take(d, list(range(d.shape[0] - 1, -1, -1)))
        v = v[:, ::-1]

        if self.desired_variance is not None:
            # throw away immediately negative eigenvalues
            d = d[d > 0]
            # the number of principal components to keep has
            # been specified by the fraction of variance to be explained
            varcum = (old_div(d, vartot)).cumsum(axis=0)
            # select only the relevant eigenvalues
            # number of relevant eigenvalues
            neigval = int(varcum.searchsorted(self.desired_variance) + 1.)
            #self.explained_variance = varcum[neigval-1]
            # cut
            d = d[0:neigval]
            v = v[:, 0:neigval]
            # define the new output dimension
            self.output_dim = int(neigval)

        # automatic dimensionality reduction
        if self.reduce:
            # remove entries that are smaller then var_abs and
            # smaller then var_rel relative to the maximum
            d = d[d > self.var_abs]
            # check that we did not throw away everything
            if len(d) == 0:
                raise mdp.NodeException('No eigenvalues larger than'
                                        ' var_abs=%e!' % self.var_abs)
            d = d[old_div(d, d.max()) > self.var_rel]

            # filter for variance relative to total variance
            if self.var_part:
                d = d[old_div(d, vartot) > self.var_part]

            v = v[:, 0:d.shape[0]]
            self._output_dim = d.shape[0]

        # set explained variance
        self.explained_variance = old_div(d.sum(), vartot)

        # store the eigenvalues
        self.d = d
        # store the eigenvectors
        self.v = v
        # store the total variance
        self.total_variance = vartot

    def get_projmatrix(self, transposed=1):
        """Returns the projection matrix.
        
        :param transposed: Determines whether the transposed projection
            matrix is returned.
            Default is True.
        :type transposed: bool
        
        :return: The projection matrix.
        :rtype: numpy.ndarray
        """
        self._if_training_stop_training()
        if transposed:
            return self.v
        return self.v.T

    def get_recmatrix(self, transposed=1):
        """Returns the the back-projection matrix
        (i.e. the reconstruction matrix).
        
        :param transposed: Determines whether the transposed back-projection matrix
            (i.e. the reconstruction matrix) is returned.
            Default is True.
        :type transposed: bool
        
        :return: The back-projection matrix (i.e. the reconstruction matrix).
        :rtype: numpy.ndarray
        """
        self._if_training_stop_training()
        if transposed:
            return self.v.T
        return self.v

    def _execute(self, x, n=None):
        """Project the input on the first 'n' principal components.
        
        If 'n' is not set, use all available components.
        
        :param x: Input with at least 'n' principle components.
        :type x: numpy.ndarray
        
        :param n: Number of first principle components.
        :type n: int
        
        :return: The projected input.
        :rtype: numpy.ndarray
        """

        if n is not None:
            return mult(x - self.avg, self.v[:, :n])
        return mult(x - self.avg, self.v)

    def _inverse(self, y, n=None):
        """Project data from the output to the input space using the
        first 'n' components.
        
        If 'n' is not set, use all available components.
        
        :param y: Data to be projected to the input space.
        :type y: numpy.ndarray
        
        :param n: Number of first principle components.
        :type n: int
        
        :return: The projected data
        :rtype: numpy.ndarray
        """

        if n is None:
            n = y.shape[1]
        if n > self.output_dim:
            error_str = ("y has dimension %d,"
                         " should be at most %d" % (n, self.output_dim))
            raise mdp.NodeException(error_str)

        v = self.get_recmatrix()
        if n is not None:
            return mult(y, v[:n, :]) + self.avg
        return mult(y, v) + self.avg
Ejemplo n.º 16
0
class PCANode(mdp.Node):
    """Filter the input data through the most significatives of its
    principal components.
    
    .. attribute:: avg
    
        Mean of the input data (available after training).
        
    .. attribute:: v
    
        Transposed of the projection matrix (available after training).

    .. attribute:: d
    
        Variance corresponding to the PCA components (eigenvalues of the
        covariance matrix).

    .. attribute:: explained_variance
    
        When output_dim has been specified as a fraction of the total
        variance, this is the fraction of the total variance that is
        actually explained.
    
    |
    
    .. admonition:: Reference
    
        More information about Principal Component Analysis, a.k.a. discrete
        Karhunen-Loeve transform can be found among others in
        I.T. Jolliffe, Principal Component Analysis, Springer-Verlag (1986).
    """


    def __init__(self, input_dim=None, output_dim=None, dtype=None,
                 svd=False, reduce=False, var_rel=1E-12, var_abs=1E-15,
                 var_part=None):
        """Initializes an object of type 'PCANode'.

        The number of principal components to be kept can be specified as
        'output_dim' directly (e.g. 'output_dim=10' means 10 components
        are kept) or by the fraction of variance to be explained
        (e.g. 'output_dim=0.95' means that as many components as necessary
        will be kept in order to explain 95% of the input variance).
        
        :param input_dim: Dimensionality of the input.
            Default is None.
        :type input_dim: int
        
        :param output_dim: Dimensionality of the output.
            Default is None.
        :type output_dim: int
        
        :param dtype: Datatype of the input.
            Default is None.
        :type dtype: numpy.dtype, str
        
        :param svd: If True use Singular Value Decomposition instead of the
            standard eigenvalue problem solver. Use it when PCANode
            complains about singular covariance matrices.
            Default is Flase.
        :type svd: bool
        
        :param reduce: Keep only those principal components which have a variance
            larger than 'var_abs' and a variance relative to the
            first principal component larger than 'var_rel' and a
            variance relative to total variance larger than 'var_part'
            (set var_part to None or 0 for no filtering).
            Default is False.
        :type reduce: bool
            
        .. note:: 
            When the *reduce* switch is enabled, the actual number
            of principal components (self.output_dim) may be different
            from that set when creating the instance.
            
        :param var_rel: Variance relative to first principal component threshold.
            Default is 1E-12.
        :type var_rel: float
        
        :param var_abs: Absolute variance threshold.
            Default is 1E-15.
        :type var_abs: float
        
        :param var_part: Variance relative to total variance threshold.
            Default is None.
        :type var_part: float
        """

        # this must occur *before* calling super!
        self.desired_variance = None
        super(PCANode, self).__init__(input_dim, output_dim, dtype)
        self.svd = svd
        # set routine for eigenproblem
        if svd:
            self._symeig = nongeneral_svd
        else:
            self._symeig = symeig
        self.var_abs = var_abs
        self.var_rel = var_rel
        self.var_part = var_part
        self.reduce = reduce
        # empirical covariance matrix, updated during the training phase
        self._cov_mtx = CovarianceMatrix(dtype)
        # attributes that defined in stop_training
        self.d = None  # eigenvalues
        self.v = None  # eigenvectors, first index for coordinates
        self.total_variance = None
        self.tlen = None
        self.avg = None
        self.explained_variance = None

    def _set_output_dim(self, n):
        if n <= 1 and isinstance(n, float):
            # set the output dim after training, when the variances are known
            self.desired_variance = n
        else:
            self._output_dim = n

    def _check_output(self, y):
        # check output rank
        if not y.ndim == 2:
            error_str = "y has rank %d, should be 2" % (y.ndim)
            raise mdp.NodeException(error_str)

        if y.shape[1] == 0 or y.shape[1] > self.output_dim:
            error_str = ("y has dimension %d"
                         ", should be 0<y<=%d" % (y.shape[1], self.output_dim))
            raise mdp.NodeException(error_str)

    def get_explained_variance(self):
        """The explained variance is the  fraction of the original variance
        that can be explained by self._output_dim PCA components. If for
        example output_dim has been set to 0.95, the explained variance could
        be something like 0.958...
        
        .. note::
            If output_dim was explicitly set to be a fixed number
            of components, there is no way to calculate the explained variance.
        
        :return: The explained variance.
        :rtype: float
        """

        return self.explained_variance

    def _train(self, x):
        """Update the covariance matrix.
        
        :param x: The training data.
        :type x: numpy.ndarray
        """
        self._cov_mtx.update(x)

    def _adjust_output_dim(self):
        """This function is used if the output dimensions is smaller than the input
        dimension (so only the larger eigenvectors have to be kept). If required it
        sets the output dim.
        
        :return: The eigenvector range.
        :rtype: tuple
        """
        # if the number of principal components to keep is not specified,
        # keep all components
        if self.desired_variance is None and self.output_dim is None:
            self.output_dim = self.input_dim
            return None

        ## define the range of eigenvalues to compute
        # if the number of principal components to keep has been
        # specified directly
        if self.output_dim is not None and self.output_dim >= 1:
            # (eigenvalues sorted in ascending order)
            return (self.input_dim - self.output_dim + 1,
                   self.input_dim)
        # otherwise, the number of principal components to keep has been
        # specified by the fraction of variance to be explained
        else:
            return None

    def _stop_training(self, debug=False):
        """Stop the training phase.
        
        :param debug: Determines if singular matrices itself are stored in
            self.cov_mtx and self.dcov_mtx to be examined, given that
            stop_training fails because of singular covmatrices.
            Default is False.
        :type debug: bool
        
        :raises mdp.NodeException: If negative eigenvalues occur, 
            the covariance matrix may be singular or no component
            amounts to variation exceeding var_abs. 
        """

        # request the covariance matrix and clean up
        self.cov_mtx, avg, self.tlen = self._cov_mtx.fix()
        del self._cov_mtx

        # this is a bit counterintuitive, as it reshapes the average vector to
        # be a matrix. in this way, however, we spare the reshape
        # operation every time that 'execute' is called.
        self.avg = avg.reshape(1, avg.shape[0])

        # range for the eigenvalues
        rng = self._adjust_output_dim()

        # if we have more variables then observations we are bound to fail here
        # suggest to use the NIPALSNode instead.
        if debug and self.tlen < self.input_dim:
            wrn = ('The number of observations (%d) '
                   'is larger than the number of input variables '
                   '(%d). You may want to use '
                   'the NIPALSNode instead.' % (self.tlen, self.input_dim))
            _warnings.warn(wrn, mdp.MDPWarning)

        # total variance can be computed at this point:
        # note that vartot == d.sum()
        vartot = numx.diag(self.cov_mtx).sum()

        ## compute and sort the eigenvalues
        # compute the eigenvectors of the covariance matrix (inplace)
        # (eigenvalues sorted in ascending order)
        try:
            d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug))
            # if reduce=False and svd=False. we should check for
            # negative eigenvalues and fail
            if not (self.reduce or self.svd or (self.desired_variance is
                                                not None)):
                if d.min() < 0:
                    raise mdp.NodeException(
                        "Got negative eigenvalues: %s.\n"
                        "You may either set output_dim to be smaller, "
                        "or set reduce=True and/or svd=True" % str(d))
        except SymeigException as exception:
            err = str(exception)+("\nCovariance matrix may be singular."
                                  "Try setting svd=True.")
            raise mdp.NodeException(err)

        # delete covariance matrix if no exception occurred
        if not debug:
            del self.cov_mtx

        # sort by descending order
        d = numx.take(d, list(range(d.shape[0]-1, -1, -1)))
        v = v[:, ::-1]

        if self.desired_variance is not None:
            # throw away immediately negative eigenvalues
            d = d[ d > 0 ]
            # the number of principal components to keep has
            # been specified by the fraction of variance to be explained
            varcum = (old_div(d, vartot)).cumsum(axis=0)
            # select only the relevant eigenvalues
            # number of relevant eigenvalues
            neigval = int(varcum.searchsorted(self.desired_variance) + 1.)
            #self.explained_variance = varcum[neigval-1]
            # cut
            d = d[0:neigval]
            v = v[:, 0:neigval]
            # define the new output dimension
            self.output_dim = int(neigval)

        # automatic dimensionality reduction
        if self.reduce:
            # remove entries that are smaller then var_abs and
            # smaller then var_rel relative to the maximum
            d = d[ d > self.var_abs ]
            # check that we did not throw away everything
            if len(d) == 0:
                raise mdp.NodeException('No eigenvalues larger than'
                                        ' var_abs=%e!'%self.var_abs)
            d = d[ old_div(d, d.max()) > self.var_rel ]

            # filter for variance relative to total variance
            if self.var_part:
                d = d[ old_div(d, vartot) > self.var_part ]

            v = v[:, 0:d.shape[0]]
            self._output_dim = d.shape[0]

        # set explained variance
        self.explained_variance = old_div(d.sum(), vartot)

        # store the eigenvalues
        self.d = d
        # store the eigenvectors
        self.v = v
        # store the total variance
        self.total_variance = vartot

    def get_projmatrix(self, transposed=1):
        """Returns the projection matrix.
        
        :param transposed: Determines whether the transposed projection
            matrix is returned.
            Default is True.
        :type transposed: bool
        
        :return: The projection matrix.
        :rtype: numpy.ndarray
        """
        self._if_training_stop_training()
        if transposed:
            return self.v
        return self.v.T

    def get_recmatrix(self, transposed=1):
        """Returns the the back-projection matrix
        (i.e. the reconstruction matrix).
        
        :param transposed: Determines whether the transposed back-projection matrix
            (i.e. the reconstruction matrix) is returned.
            Default is True.
        :type transposed: bool
        
        :return: The back-projection matrix (i.e. the reconstruction matrix).
        :rtype: numpy.ndarray
        """
        self._if_training_stop_training()
        if transposed:
            return self.v.T
        return self.v

    def _execute(self, x, n=None):
        """Project the input on the first 'n' principal components.
        
        If 'n' is not set, use all available components.
        
        :param x: Input with at least 'n' principle components.
        :type x: numpy.ndarray
        
        :param n: Number of first principle components.
        :type n: int
        
        :return: The projected input.
        :rtype: numpy.ndarray
        """

        if n is not None:
            return mult(x-self.avg, self.v[:, :n])
        return mult(x-self.avg, self.v)

    def _inverse(self, y, n=None):
        """Project data from the output to the input space using the
        first 'n' components.
        
        If 'n' is not set, use all available components.
        
        :param y: Data to be projected to the input space.
        :type y: numpy.ndarray
        
        :param n: Number of first principle components.
        :type n: int
        
        :return: The projected data
        :rtype: numpy.ndarray
        """

        if n is None:
            n = y.shape[1]
        if n > self.output_dim:
            error_str = ("y has dimension %d,"
                         " should be at most %d" % (n, self.output_dim))
            raise mdp.NodeException(error_str)

        v = self.get_recmatrix()
        if n is not None:
            return mult(y, v[:n, :]) + self.avg
        return mult(y, v) + self.avg
Ejemplo n.º 17
0
class SFANode(Node):
    """Extract the slowly varying components from the input data.
    More information about Slow Feature Analysis can be found in
    Wiskott, L. and Sejnowski, T.J., Slow Feature Analysis: Unsupervised
    Learning of Invariances, Neural Computation, 14(4):715-770 (2002).

    **Instance variables of interest**

      ``self.avg``
          Mean of the input data (available after training)

      ``self.sf``
          Matrix of the SFA filters (available after training)

      ``self.d``
          Delta values corresponding to the SFA components (generalized
          eigenvalues). [See the docs of the ``get_eta_values`` method for
          more information]

    **Special arguments for constructor**

      ``include_last_sample``
          If ``False`` the `train` method discards the last sample in every
          chunk during training when calculating the covariance matrix.
          The last sample is in this case only used for calculating the
          covariance matrix of the derivatives. The switch should be set
          to ``False`` if you plan to train with several small chunks. For
          example we can split a sequence (index is time)::

            x_1 x_2 x_3 x_4

          in smaller parts like this::

            x_1 x_2
            x_2 x_3
            x_3 x_4

          The SFANode will see 3 derivatives for the temporal covariance
          matrix, and the first 3 points for the spatial covariance matrix.
          Of course you will need to use a generator that *connects* the
          small chunks (the last sample needs to be sent again in the next
          chunk). If ``include_last_sample`` was True, depending on the
          generator you use, you would either get::

             x_1 x_2
             x_2 x_3
             x_3 x_4

          in which case the last sample of every chunk would be used twice
          when calculating the covariance matrix, or::

             x_1 x_2
             x_3 x_4

          in which case you loose the derivative between ``x_3`` and ``x_2``.

          If you plan to train with a single big chunk leave
          ``include_last_sample`` to the default value, i.e. ``True``.

          You can even change this behaviour during training. Just set the
          corresponding switch in the `train` method.
    """
    def __init__(self,
                 input_dim=None,
                 output_dim=None,
                 dtype=None,
                 include_last_sample=True):
        """
        For the ``include_last_sample`` switch have a look at the
        SFANode class docstring.
         """
        super(SFANode, self).__init__(input_dim, output_dim, dtype)
        self._include_last_sample = include_last_sample

        # init two covariance matrices
        # one for the input data
        self._cov_mtx = CovarianceMatrix(dtype)
        # one for the derivatives
        self._dcov_mtx = CovarianceMatrix(dtype)

        # set routine for eigenproblem
        self._symeig = symeig

        # SFA eigenvalues and eigenvectors, will be set after training
        self.d = None
        self.sf = None  # second index for outputs
        self.avg = None
        self._bias = None  # avg multiplied with sf
        self.tlen = None

    def time_derivative(self, x):
        """Compute the linear approximation of the time derivative."""
        # this is faster than a linear_filter or a weave-inline solution
        return x[1:, :] - x[:-1, :]

    def _set_range(self):
        if self.output_dim is not None and self.output_dim <= self.input_dim:
            # (eigenvalues sorted in ascending order)
            rng = (1, self.output_dim)
        else:
            # otherwise, keep all output components
            rng = None
            self.output_dim = self.input_dim
        return rng

    def _check_train_args(self, x, *args, **kwargs):
        # check that we have at least 2 time samples to
        # compute the update for the derivative covariance matrix
        s = x.shape[0]
        if s < 2:
            raise TrainingException('Need at least 2 time samples to '
                                    'compute time derivative (%d given)' % s)

    def _train(self, x, include_last_sample=None):
        """
        For the ``include_last_sample`` switch have a look at the
        SFANode class docstring.
        """
        if include_last_sample is None:
            include_last_sample = self._include_last_sample
        # works because x[:None] == x[:]
        last_sample_index = None if include_last_sample else -1

        # update the covariance matrices
        self._cov_mtx.update(x[:last_sample_index, :])
        self._dcov_mtx.update(self.time_derivative(x))

    def _stop_training(self, debug=False):
        ##### request the covariance matrices and clean up
        self.cov_mtx, self.avg, self.tlen = self._cov_mtx.fix()
        del self._cov_mtx
        # do not center around the mean:
        # we want the second moment matrix (centered about 0) and
        # not the second central moment matrix (centered about the mean), i.e.
        # the covariance matrix
        self.dcov_mtx, self.davg, self.dtlen = self._dcov_mtx.fix(center=False)
        del self._dcov_mtx

        rng = self._set_range()

        #### solve the generalized eigenvalue problem
        # the eigenvalues are already ordered in ascending order
        try:
            self.d, self.sf = self._symeig(self.dcov_mtx,
                                           self.cov_mtx,
                                           range=rng,
                                           overwrite=(not debug))
            d = self.d
            # check that we get only *positive* eigenvalues
            if d.min() < 0:
                err_msg = (
                    "Got negative eigenvalues: %s."
                    " You may either set output_dim to be smaller,"
                    " or prepend the SFANode with a PCANode(reduce=True)"
                    " or PCANode(svd=True)" % str(d))
                raise NodeException(err_msg)
        except SymeigException as exception:
            errstr = str(exception) + "\n Covariance matrices may be singular."
            raise NodeException(errstr)

        if not debug:
            # delete covariance matrix if no exception occurred
            del self.cov_mtx
            del self.dcov_mtx

        # store bias
        self._bias = mult(self.avg, self.sf)

    def _execute(self, x, n=None):
        """Compute the output of the slowest functions.
        If 'n' is an integer, then use the first 'n' slowest components."""
        if n:
            sf = self.sf[:, :n]
            bias = self._bias[:n]
        else:
            sf = self.sf
            bias = self._bias
        return mult(x, sf) - bias

    def _inverse(self, y):
        return mult(y, pinv(self.sf)) + self.avg

    def get_eta_values(self, t=1):
        """Return the eta values of the slow components learned during
        the training phase. If the training phase has not been completed
        yet, call `stop_training`.

        The delta value of a signal is a measure of its temporal
        variation, and is defined as the mean of the derivative squared,
        i.e. delta(x) = mean(dx/dt(t)^2).  delta(x) is zero if
        x is a constant signal, and increases if the temporal variation
        of the signal is bigger.

        The eta value is a more intuitive measure of temporal variation,
        defined as
        eta(x) = t/(2*pi) * sqrt(delta(x))
        If x is a signal of length 't' which consists of a sine function
        that accomplishes exactly N oscillations, then eta(x)=N.

        :Parameters:
           t
             Sampling frequency in Hz.

             The original definition in (Wiskott and Sejnowski, 2002)
             is obtained for t = number of training data points, while
             for t=1 (default), this corresponds to the beta-value defined in
             (Berkes and Wiskott, 2005).
        """
        if self.is_training():
            self.stop_training()
        return self._refcast(t / (2 * numx.pi) * numx.sqrt(self.d))
Ejemplo n.º 18
0
class PCANode(mdp.Node):
    """Filter the input data through the most significatives of its
    principal components.

    **Internal variables of interest**
    
      ``self.avg``
          Mean of the input data (available after training).

      ``self.v``
          Transposed of the projection matrix (available after training).

      ``self.d``
          Variance corresponding to the PCA components (eigenvalues of the
          covariance matrix).

      ``self.explained_variance``
          When output_dim has been specified as a fraction of the total
          variance, this is the fraction of the total variance that is
          actually explained.

    More information about Principal Component Analysis, a.k.a. discrete
    Karhunen-Loeve transform can be found among others in
    I.T. Jolliffe, Principal Component Analysis, Springer-Verlag (1986).
    """

    def __init__(self, input_dim=None, output_dim=None, dtype=None,
                 svd=False, reduce=False, var_rel=1E-12, var_abs=1E-15,
                 var_part=None):
        """The number of principal components to be kept can be specified as
        'output_dim' directly (e.g. 'output_dim=10' means 10 components
        are kept) or by the fraction of variance to be explained
        (e.g. 'output_dim=0.95' means that as many components as necessary
        will be kept in order to explain 95% of the input variance).

        Other Keyword Arguments:

        svd -- if True use Singular Value Decomposition instead of the
               standard eigenvalue problem solver. Use it when PCANode
               complains about singular covariance matrices

        reduce -- Keep only those principal components which have a variance
                  larger than 'var_abs' and a variance relative to the
                  first principal component larger than 'var_rel' and a
                  variance relative to total variance larger than 'var_part'
                  (set var_part to None or 0 for no filtering).
                  Note: when the 'reduce' switch is enabled, the actual number
                  of principal components (self.output_dim) may be different
                  from that set when creating the instance.
        """
        # this must occur *before* calling super!
        self.desired_variance = None
        super(PCANode, self).__init__(input_dim, output_dim, dtype)
        self.svd = svd
        # set routine for eigenproblem
        if svd:
            self._symeig = nongeneral_svd
        else:
            self._symeig = symeig
        self.var_abs = var_abs
        self.var_rel = var_rel
        self.var_part = var_part
        self.reduce = reduce
        # empirical covariance matrix, updated during the training phase
        self._cov_mtx = CovarianceMatrix(dtype)
        # attributes that defined in stop_training
        self.d = None  # eigenvalues
        self.v = None  # eigenvectors, first index for coordinates
        self.total_variance = None
        self.tlen = None
        self.avg = None
        self.explained_variance = None

    def _set_output_dim(self, n):
        if n <= 1 and isinstance(n, float):
            # set the output dim after training, when the variances are known
            self.desired_variance = n
        else:
            self._output_dim = n

    def _check_output(self, y):
        # check output rank
        if not y.ndim == 2:
            error_str = "y has rank %d, should be 2" % (y.ndim)
            raise mdp.NodeException(error_str)

        if y.shape[1] == 0 or y.shape[1] > self.output_dim:
            error_str = ("y has dimension %d"
                         ", should be 0<y<=%d" % (y.shape[1], self.output_dim))
            raise mdp.NodeException(error_str)

    def get_explained_variance(self):
        """Return the fraction of the original variance that can be
        explained by self._output_dim PCA components.
        If for example output_dim has been set to 0.95, the explained
        variance could be something like 0.958...
        Note that if output_dim was explicitly set to be a fixed number
        of components, there is no way to calculate the explained variance.
        """
        return self.explained_variance

    def _train(self, x):
        # update the covariance matrix
        self._cov_mtx.update(x)

    def _adjust_output_dim(self):
        """Return the eigenvector range and set the output dim if required.

        This is used if the output dimensions is smaller than the input
        dimension (so only the larger eigenvectors have to be kept).
        """
        # if the number of principal components to keep is not specified,
        # keep all components
        if self.desired_variance is None and self.output_dim is None:
            self.output_dim = self.input_dim
            return None

        ## define the range of eigenvalues to compute
        # if the number of principal components to keep has been
        # specified directly
        if self.output_dim is not None and self.output_dim >= 1:
            # (eigenvalues sorted in ascending order)
            return (self.input_dim - self.output_dim + 1,
                   self.input_dim)
        # otherwise, the number of principal components to keep has been
        # specified by the fraction of variance to be explained
        else:
            return None

    def _stop_training(self, debug=False):
        """Stop the training phase.

        Keyword arguments:

        debug=True     if stop_training fails because of singular cov
                       matrices, the singular matrices itselves are stored in
                       self.cov_mtx and self.dcov_mtx to be examined.
        """
        # request the covariance matrix and clean up
        self.cov_mtx, avg, self.tlen = self._cov_mtx.fix()
        del self._cov_mtx

        # this is a bit counterintuitive, as it reshapes the average vector to
        # be a matrix. in this way, however, we spare the reshape
        # operation every time that 'execute' is called.
        self.avg = avg.reshape(1, avg.shape[0])

        # range for the eigenvalues
        rng = self._adjust_output_dim()

        # if we have more variables then observations we are bound to fail here
        # suggest to use the NIPALSNode instead.
        if debug and self.tlen < self.input_dim:
            wrn = ('The number of observations (%d) '
                   'is larger than the number of input variables '
                   '(%d). You may want to use '
                   'the NIPALSNode instead.' % (self.tlen, self.input_dim))
            _warnings.warn(wrn, mdp.MDPWarning)

        # total variance can be computed at this point:
        # note that vartot == d.sum()
        vartot = numx.diag(self.cov_mtx).sum()

        ## compute and sort the eigenvalues
        # compute the eigenvectors of the covariance matrix (inplace)
        # (eigenvalues sorted in ascending order)
        try:
            d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug))
            # if reduce=False and svd=False. we should check for
            # negative eigenvalues and fail
            if not (self.reduce or self.svd or (self.desired_variance is
                                                not None)):
                if d.min() < 0:
                    raise mdp.NodeException(
                        "Got negative eigenvalues: %s.\n"
                        "You may either set output_dim to be smaller, "
                        "or set reduce=True and/or svd=True" % str(d))
        except SymeigException, exception:
            err = str(exception)+("\nCovariance matrix may be singular."
                                  "Try setting svd=True.")
            raise mdp.NodeException(err)

        # delete covariance matrix if no exception occurred
        if not debug:
            del self.cov_mtx

        # sort by descending order
        d = numx.take(d, range(d.shape[0]-1, -1, -1))
        v = v[:, ::-1]

        if self.desired_variance is not None:
            # throw away immediately negative eigenvalues
            d = d[ d > 0 ]
            # the number of principal components to keep has
            # been specified by the fraction of variance to be explained
            varcum = (d / vartot).cumsum(axis=0)
            # select only the relevant eigenvalues
            # number of relevant eigenvalues
            neigval = varcum.searchsorted(self.desired_variance) + 1.
            #self.explained_variance = varcum[neigval-1]
            # cut
            d = d[0:neigval]
            v = v[:, 0:neigval]
            # define the new output dimension
            self.output_dim = int(neigval)

        # automatic dimensionality reduction
        if self.reduce:
            # remove entries that are smaller then var_abs and
            # smaller then var_rel relative to the maximum
            d = d[ d > self.var_abs ]
            d = d[ d / d.max() > self.var_rel ]

            # filter for variance relative to total variance
            if self.var_part:
                d = d[ d / vartot > self.var_part ]

            v = v[:, 0:d.shape[0]]
            self._output_dim = d.shape[0]

        # set explained variance
        self.explained_variance = d.sum() / vartot

        # store the eigenvalues
        self.d = d
        # store the eigenvectors
        self.v = v
        # store the total variance
        self.total_variance = vartot
Ejemplo n.º 19
0
    def __init__(self, input_dim=None, output_dim=None, dtype=None,
                 svd=False, reduce=False, var_rel=1E-12, var_abs=1E-15,
                 var_part=None):
        """Initializes an object of type 'PCANode'.

        The number of principal components to be kept can be specified as
        'output_dim' directly (e.g. 'output_dim=10' means 10 components
        are kept) or by the fraction of variance to be explained
        (e.g. 'output_dim=0.95' means that as many components as necessary
        will be kept in order to explain 95% of the input variance).
        
        :param input_dim: Dimensionality of the input.
            Default is None.
        :type input_dim: int
        
        :param output_dim: Dimensionality of the output.
            Default is None.
        :type output_dim: int
        
        :param dtype: Datatype of the input.
            Default is None.
        :type dtype: numpy.dtype, str
        
        :param svd: If True use Singular Value Decomposition instead of the
            standard eigenvalue problem solver. Use it when PCANode
            complains about singular covariance matrices.
            Default is Flase.
        :type svd: bool
        
        :param reduce: Keep only those principal components which have a variance
            larger than 'var_abs' and a variance relative to the
            first principal component larger than 'var_rel' and a
            variance relative to total variance larger than 'var_part'
            (set var_part to None or 0 for no filtering).
            Default is False.
        :type reduce: bool
            
        .. note:: 
            When the *reduce* switch is enabled, the actual number
            of principal components (self.output_dim) may be different
            from that set when creating the instance.
            
        :param var_rel: Variance relative to first principal component threshold.
            Default is 1E-12.
        :type var_rel: float
        
        :param var_abs: Absolute variance threshold.
            Default is 1E-15.
        :type var_abs: float
        
        :param var_part: Variance relative to total variance threshold.
            Default is None.
        :type var_part: float
        """

        # this must occur *before* calling super!
        self.desired_variance = None
        super(PCANode, self).__init__(input_dim, output_dim, dtype)
        self.svd = svd
        # set routine for eigenproblem
        if svd:
            self._symeig = nongeneral_svd
        else:
            self._symeig = symeig
        self.var_abs = var_abs
        self.var_rel = var_rel
        self.var_part = var_part
        self.reduce = reduce
        # empirical covariance matrix, updated during the training phase
        self._cov_mtx = CovarianceMatrix(dtype)
        # attributes that defined in stop_training
        self.d = None  # eigenvalues
        self.v = None  # eigenvectors, first index for coordinates
        self.total_variance = None
        self.tlen = None
        self.avg = None
        self.explained_variance = None
Ejemplo n.º 20
0
class FANode(mdp.Node):
    """Perform Factor Analysis.

    The current implementation should be most efficient for long
    data sets: the sufficient statistics are collected in the
    training phase, and all EM-cycles are performed at
    its end.

    The ``execute`` method returns the Maximum A Posteriori estimate
    of the latent variables. The ``generate_input`` method generates
    observations from the prior distribution.

    .. attribute:: mu
    
          Mean of the input data (available after training)

    .. attribute:: A
    
          Generating weights (available after training)

    .. attribute:: E_y_mtx
    
          Weights for Maximum A Posteriori inference

    .. attribute:: sigma
    
          Vector of estimated variance of the noise
          for all input components
    
    |
    
    .. admonition:: Reference
    
        More information about Factor Analysis can be found in
        Max Welling's classnotes:
        http://www.ics.uci.edu/~welling/classnotes/classnotes.html ,
        in the chapter 'Linear Models'.
    """
    def __init__(self, tol=1e-4, max_cycles=100, verbose=False,
                 input_dim=None, output_dim=None, dtype=None):
        """Initializes an object of type 'FANode'.
        
        :param tol: Tolerance (minimum change in log-likelihood before exiting
            the EM algorithm).
        :type tol: float
        
        :param max_cycles: Maximum number of EM cycles/
        :type max_cycles: int
        
        :param verbose: If true, print log-likelihood during the EM-cycles.
        :type verbose: bool
        
        :param input_dim: The input dimensionality.
        :type input_dim: int
        
        :param output_dim: The output dimensionality.
        :type output_dim: int
        
        :param dtype: The datatype.
        :type dtype: numpy.dtype or str
        """

        # Notation as in Max Welling's notes
        super(FANode, self).__init__(input_dim, output_dim, dtype)
        self.tol = tol
        self.max_cycles = max_cycles
        self.verbose = verbose
        self._cov_mtx = CovarianceMatrix(dtype, bias=True)

    def _train(self, x):
        # update the covariance matrix
        self._cov_mtx.update(x)

    def _stop_training(self):
        #### some definitions
        verbose = self.verbose
        typ = self.dtype
        tol = self.tol
        d = self.input_dim
        # if the number of latent variables is not specified,
        # set it equal to the number of input components
        if not self.output_dim:
            self.output_dim = d
        k = self.output_dim
        # indices of the diagonal elements of a dxd or kxk matrix
        idx_diag_d = [i*(d+1) for i in range(d)]
        idx_diag_k = [i*(k+1) for i in range(k)]
        # constant term in front of the log-likelihood
        const = -d/2. * numx.log(2.*numx.pi)

        ##### request the covariance matrix and clean up
        cov_mtx, mu, tlen = self._cov_mtx.fix()
        del self._cov_mtx
        cov_diag = cov_mtx.diagonal()

        ##### initialize the parameters
        # noise variances
        sigma = cov_diag
        # loading factors
        # Zoubin uses the determinant of cov_mtx^1/d as scale but it's
        # too slow for large matrices. Is the product of the diagonal a good
        # approximation?
        if d<=300:
            scale = det(cov_mtx)**(old_div(1.,d))
        else:
            scale = numx.product(sigma)**(old_div(1.,d))
        if scale <= 0.:
            err = ("The covariance matrix of the data is singular. "
                   "Redundant dimensions need to be removed.")
            raise NodeException(err)

        A = normal(0., sqrt(old_div(scale,k)), size=(d, k)).astype(typ)

        ##### EM-cycle
        lhood_curve = []
        base_lhood = None
        old_lhood = -numx.inf
        for t in range(self.max_cycles):
            ## compute B = (A A^T + Sigma)^-1
            B = mult(A, A.T)
            # B += diag(sigma), avoid computing diag(sigma) which is dxd
            B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d)+sigma)
            # this quantity is used later for the log-likelihood
            # abs is there to avoid numerical errors when det < 0
            log_det_B = numx.log(abs(det(B)))
            # end the computation of B
            B = inv(B)

            ## other useful quantities
            trA_B = mult(A.T, B)
            trA_B_cov_mtx = mult(trA_B, cov_mtx)

            ##### E-step
            ## E_yyT = E(y_n y_n^T | x_n)
            E_yyT = - mult(trA_B, A) + mult(trA_B_cov_mtx, trA_B.T)
            # E_yyT += numx.eye(k)
            E_yyT.ravel().put(idx_diag_k, E_yyT.ravel().take(idx_diag_k)+1.)

            ##### M-step
            A = mult(trA_B_cov_mtx.T, inv(E_yyT))
            sigma = cov_diag - (mult(A, trA_B_cov_mtx)).diagonal()

            ##### log-likelihood
            trace_B_cov = (B*cov_mtx.T).sum()
            # this is actually likelihood/tlen.
            lhood = const - 0.5*log_det_B - 0.5*trace_B_cov
            if verbose:
                print('cycle', t, 'log-lhood:', lhood)

            ##### convergence criterion
            if base_lhood is None:
                base_lhood = lhood
            else:
                # convergence criterion
                if (lhood-base_lhood)<(1.+tol)*(old_lhood-base_lhood):
                    break
                if lhood < old_lhood:
                    # this should never happen
                    # it sometimes does, e.g. if the noise is extremely low,
                    # because of numerical rounding effects
                    warnings.warn(_LHOOD_WARNING, mdp.MDPWarning)
            old_lhood = lhood
            lhood_curve.append(lhood)

        self.tlen = tlen
        self.A = A
        self.mu = mu.reshape(1, d)
        self.sigma = sigma

        ## MAP matrix
        # compute B = (A A^T + Sigma)^-1
        B = mult(A, A.T).copy()
        B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d)+sigma)
        B = inv(B)
        self.E_y_mtx = mult(B.T, A)

        self.lhood = lhood_curve

    def _execute(self, x):
        return mult(x-self.mu, self.E_y_mtx)

    @staticmethod
    def is_invertible():
        return False

    def generate_input(self, len_or_y=1, noise=False):
        """Generate data from the prior distribution.

        If the training phase has not been completed yet, call stop_training.
        
        :param len_or_y: If integer, it specified the number of observation
            to generate. If array, it is used as a set of samples
            of the latent variables

        :param noise: If true, generation includes the estimated noise
        :type noise: bool
        
        :return: The generated data.
        :rtype: numpy.ndarray
        """

        self._if_training_stop_training()

        # set the output dimension if necessary
        if self.output_dim is None:
            # if the input_dim is not defined, raise an exception
            if self.input_dim is None:
                errstr = ("Number of input dimensions undefined. Inversion "
                          "not possible.")
                raise NodeException(errstr)
            self.output_dim = self.input_dim

        if isinstance(len_or_y, int):
            size = (len_or_y, self.output_dim)
            y = self._refcast(mdp.numx_rand.normal(size=size))
        else:
            y = self._refcast(len_or_y)
            self._check_output(y)

        res = mult(y, self.A.T)+self.mu
        if noise:
            ns = mdp.numx_rand.normal(size=(y.shape[0], self.input_dim))
            ns *= numx.sqrt(self.sigma)
            res += self._refcast(ns)
        return res
Ejemplo n.º 21
0
class FANode(mdp.Node):
    """Perform Factor Analysis.

    The current implementation should be most efficient for long
    data sets: the sufficient statistics are collected in the
    training phase, and all EM-cycles are performed at
    its end.

    The ``execute`` method returns the Maximum A Posteriori estimate
    of the latent variables. The ``generate_input`` method generates
    observations from the prior distribution.

    **Internal variables of interest**

      ``self.mu``
          Mean of the input data (available after training)

      ``self.A``
          Generating weights (available after training)

      ``self.E_y_mtx``
          Weights for Maximum A Posteriori inference

      ``self.sigma``
          Vector of estimated variance of the noise
          for all input components

    More information about Factor Analysis can be found in
    Max Welling's classnotes:
    http://www.ics.uci.edu/~welling/classnotes/classnotes.html ,
    in the chapter 'Linear Models'.
    """
    def __init__(self,
                 tol=1e-4,
                 max_cycles=100,
                 verbose=False,
                 input_dim=None,
                 output_dim=None,
                 dtype=None):
        """
        :Parameters:
          tol
            tolerance (minimum change in log-likelihood before exiting
            the EM algorithm)
          max_cycles
            maximum number of EM cycles
          verbose
            if true, print log-likelihood during the EM-cycles
        """
        # Notation as in Max Welling's notes
        super(FANode, self).__init__(input_dim, output_dim, dtype)
        self.tol = tol
        self.max_cycles = max_cycles
        self.verbose = verbose
        self._cov_mtx = CovarianceMatrix(dtype, bias=True)

    def _train(self, x):
        # update the covariance matrix
        self._cov_mtx.update(x)

    def _stop_training(self):
        #### some definitions
        verbose = self.verbose
        typ = self.dtype
        tol = self.tol
        d = self.input_dim
        # if the number of latent variables is not specified,
        # set it equal to the number of input components
        if not self.output_dim:
            self.output_dim = d
        k = self.output_dim
        # indices of the diagonal elements of a dxd or kxk matrix
        idx_diag_d = [i * (d + 1) for i in range(d)]
        idx_diag_k = [i * (k + 1) for i in range(k)]
        # constant term in front of the log-likelihood
        const = -d / 2. * numx.log(2. * numx.pi)

        ##### request the covariance matrix and clean up
        cov_mtx, mu, tlen = self._cov_mtx.fix()
        del self._cov_mtx
        cov_diag = cov_mtx.diagonal()

        ##### initialize the parameters
        # noise variances
        sigma = cov_diag
        # loading factors
        # Zoubin uses the determinant of cov_mtx^1/d as scale but it's
        # too slow for large matrices. Is the product of the diagonal a good
        # approximation?
        if d <= 300:
            scale = det(cov_mtx)**(1. / d)
        else:
            scale = numx.product(sigma)**(1. / d)
        if scale <= 0.:
            err = ("The covariance matrix of the data is singular. "
                   "Redundant dimensions need to be removed.")
            raise NodeException(err)

        A = normal(0., sqrt(scale / k), size=(d, k)).astype(typ)

        ##### EM-cycle
        lhood_curve = []
        base_lhood = None
        old_lhood = -numx.inf
        for t in xrange(self.max_cycles):
            ## compute B = (A A^T + Sigma)^-1
            B = mult(A, A.T)
            # B += diag(sigma), avoid computing diag(sigma) which is dxd
            B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d) + sigma)
            # this quantity is used later for the log-likelihood
            # abs is there to avoid numerical errors when det < 0
            log_det_B = numx.log(abs(det(B)))
            # end the computation of B
            B = inv(B)

            ## other useful quantities
            trA_B = mult(A.T, B)
            trA_B_cov_mtx = mult(trA_B, cov_mtx)

            ##### E-step
            ## E_yyT = E(y_n y_n^T | x_n)
            E_yyT = -mult(trA_B, A) + mult(trA_B_cov_mtx, trA_B.T)
            # E_yyT += numx.eye(k)
            E_yyT.ravel().put(idx_diag_k, E_yyT.ravel().take(idx_diag_k) + 1.)

            ##### M-step
            A = mult(trA_B_cov_mtx.T, inv(E_yyT))
            sigma = cov_diag - (mult(A, trA_B_cov_mtx)).diagonal()

            ##### log-likelihood
            trace_B_cov = (B * cov_mtx.T).sum()
            # this is actually likelihood/tlen.
            lhood = const - 0.5 * log_det_B - 0.5 * trace_B_cov
            if verbose:
                print 'cycle', t, 'log-lhood:', lhood

            ##### convergence criterion
            if base_lhood is None:
                base_lhood = lhood
            else:
                # convergence criterion
                if (lhood -
                        base_lhood) < (1. + tol) * (old_lhood - base_lhood):
                    break
                if lhood < old_lhood:
                    # this should never happen
                    # it sometimes does, e.g. if the noise is extremely low,
                    # because of numerical rounding effects
                    warnings.warn(_LHOOD_WARNING, mdp.MDPWarning)
            old_lhood = lhood
            lhood_curve.append(lhood)

        self.tlen = tlen
        self.A = A
        self.mu = mu.reshape(1, d)
        self.sigma = sigma

        ## MAP matrix
        # compute B = (A A^T + Sigma)^-1
        B = mult(A, A.T).copy()
        B.ravel().put(idx_diag_d, B.ravel().take(idx_diag_d) + sigma)
        B = inv(B)
        self.E_y_mtx = mult(B.T, A)

        self.lhood = lhood_curve

    def _execute(self, x):
        return mult(x - self.mu, self.E_y_mtx)

    @staticmethod
    def is_invertible():
        return False

    def generate_input(self, len_or_y=1, noise=False):
        """
        Generate data from the prior distribution.

        If the training phase has not been completed yet, call stop_training.

        :Arguments:
          len_or_y
                    If integer, it specified the number of observation
                    to generate. If array, it is used as a set of samples
                    of the latent variables
          noise
                    if true, generation includes the estimated noise
        """

        self._if_training_stop_training()

        # set the output dimension if necessary
        if self.output_dim is None:
            # if the input_dim is not defined, raise an exception
            if self.input_dim is None:
                errstr = ("Number of input dimensions undefined. Inversion "
                          "not possible.")
                raise NodeException(errstr)
            self.output_dim = self.input_dim

        if isinstance(len_or_y, int):
            size = (len_or_y, self.output_dim)
            y = self._refcast(mdp.numx_rand.normal(size=size))
        else:
            y = self._refcast(len_or_y)
            self._check_output(y)

        res = mult(y, self.A.T) + self.mu
        if noise:
            ns = mdp.numx_rand.normal(size=(y.shape[0], self.input_dim))
            ns *= numx.sqrt(self.sigma)
            res += self._refcast(ns)
        return res
Ejemplo n.º 22
0
class PCANode(mdp.Node):
    """Filter the input data through the most significatives of its
    principal components.

    **Internal variables of interest**
    
      ``self.avg``
          Mean of the input data (available after training).

      ``self.v``
          Transposed of the projection matrix (available after training).

      ``self.d``
          Variance corresponding to the PCA components (eigenvalues of the
          covariance matrix).

      ``self.explained_variance``
          When output_dim has been specified as a fraction of the total
          variance, this is the fraction of the total variance that is
          actually explained.

    More information about Principal Component Analysis, a.k.a. discrete
    Karhunen-Loeve transform can be found among others in
    I.T. Jolliffe, Principal Component Analysis, Springer-Verlag (1986).
    """
    def __init__(self,
                 input_dim=None,
                 output_dim=None,
                 dtype=None,
                 svd=False,
                 reduce=False,
                 var_rel=1E-12,
                 var_abs=1E-15,
                 var_part=None):
        """The number of principal components to be kept can be specified as
        'output_dim' directly (e.g. 'output_dim=10' means 10 components
        are kept) or by the fraction of variance to be explained
        (e.g. 'output_dim=0.95' means that as many components as necessary
        will be kept in order to explain 95% of the input variance).

        Other Keyword Arguments:

        svd -- if True use Singular Value Decomposition instead of the
               standard eigenvalue problem solver. Use it when PCANode
               complains about singular covariance matrices

        reduce -- Keep only those principal components which have a variance
                  larger than 'var_abs' and a variance relative to the
                  first principal component larger than 'var_rel' and a
                  variance relative to total variance larger than 'var_part'
                  (set var_part to None or 0 for no filtering).
                  Note: when the 'reduce' switch is enabled, the actual number
                  of principal components (self.output_dim) may be different
                  from that set when creating the instance.
        """
        # this must occur *before* calling super!
        self.desired_variance = None
        super(PCANode, self).__init__(input_dim, output_dim, dtype)
        self.svd = svd
        # set routine for eigenproblem
        if svd:
            self._symeig = nongeneral_svd
        else:
            self._symeig = symeig
        self.var_abs = var_abs
        self.var_rel = var_rel
        self.var_part = var_part
        self.reduce = reduce
        # empirical covariance matrix, updated during the training phase
        self._cov_mtx = CovarianceMatrix(dtype)
        # attributes that defined in stop_training
        self.d = None  # eigenvalues
        self.v = None  # eigenvectors, first index for coordinates
        self.total_variance = None
        self.tlen = None
        self.avg = None
        self.explained_variance = None

    def _set_output_dim(self, n):
        if n <= 1 and isinstance(n, float):
            # set the output dim after training, when the variances are known
            self.desired_variance = n
        else:
            self._output_dim = n

    def _check_output(self, y):
        # check output rank
        if not y.ndim == 2:
            error_str = "y has rank %d, should be 2" % (y.ndim)
            raise mdp.NodeException(error_str)

        if y.shape[1] == 0 or y.shape[1] > self.output_dim:
            error_str = ("y has dimension %d"
                         ", should be 0<y<=%d" % (y.shape[1], self.output_dim))
            raise mdp.NodeException(error_str)

    def get_explained_variance(self):
        """Return the fraction of the original variance that can be
        explained by self._output_dim PCA components.
        If for example output_dim has been set to 0.95, the explained
        variance could be something like 0.958...
        Note that if output_dim was explicitly set to be a fixed number
        of components, there is no way to calculate the explained variance.
        """
        return self.explained_variance

    def _train(self, x):
        # update the covariance matrix
        self._cov_mtx.update(x)

    def _adjust_output_dim(self):
        """Return the eigenvector range and set the output dim if required.

        This is used if the output dimensions is smaller than the input
        dimension (so only the larger eigenvectors have to be kept).
        """
        # if the number of principal components to keep is not specified,
        # keep all components
        if self.desired_variance is None and self.output_dim is None:
            self.output_dim = self.input_dim
            return None

        ## define the range of eigenvalues to compute
        # if the number of principal components to keep has been
        # specified directly
        if self.output_dim is not None and self.output_dim >= 1:
            # (eigenvalues sorted in ascending order)
            return (self.input_dim - self.output_dim + 1, self.input_dim)
        # otherwise, the number of principal components to keep has been
        # specified by the fraction of variance to be explained
        else:
            return None

    def _stop_training(self, debug=False):
        """Stop the training phase.

        Keyword arguments:

        debug=True     if stop_training fails because of singular cov
                       matrices, the singular matrices itselves are stored in
                       self.cov_mtx and self.dcov_mtx to be examined.
        """
        # request the covariance matrix and clean up
        self.cov_mtx, avg, self.tlen = self._cov_mtx.fix()
        del self._cov_mtx

        # this is a bit counterintuitive, as it reshapes the average vector to
        # be a matrix. in this way, however, we spare the reshape
        # operation every time that 'execute' is called.
        self.avg = avg.reshape(1, avg.shape[0])

        # range for the eigenvalues
        rng = self._adjust_output_dim()

        # if we have more variables then observations we are bound to fail here
        # suggest to use the NIPALSNode instead.
        if debug and self.tlen < self.input_dim:
            wrn = ('The number of observations (%d) '
                   'is larger than the number of input variables '
                   '(%d). You may want to use '
                   'the NIPALSNode instead.' % (self.tlen, self.input_dim))
            _warnings.warn(wrn, mdp.MDPWarning)

        # total variance can be computed at this point:
        # note that vartot == d.sum()
        vartot = numx.diag(self.cov_mtx).sum()

        ## compute and sort the eigenvalues
        # compute the eigenvectors of the covariance matrix (inplace)
        # (eigenvalues sorted in ascending order)
        try:
            d, v = self._symeig(self.cov_mtx, range=rng, overwrite=(not debug))
            # if reduce=False and svd=False. we should check for
            # negative eigenvalues and fail
            if not (self.reduce or self.svd or
                    (self.desired_variance is not None)):
                if d.min() < 0:
                    raise mdp.NodeException(
                        "Got negative eigenvalues: %s.\n"
                        "You may either set output_dim to be smaller, "
                        "or set reduce=True and/or svd=True" % str(d))
        except SymeigException, exception:
            err = str(exception) + ("\nCovariance matrix may be singular."
                                    "Try setting svd=True.")
            raise mdp.NodeException(err)

        # delete covariance matrix if no exception occurred
        if not debug:
            del self.cov_mtx

        # sort by descending order
        d = numx.take(d, range(d.shape[0] - 1, -1, -1))
        v = v[:, ::-1]

        if self.desired_variance is not None:
            # throw away immediately negative eigenvalues
            d = d[d > 0]
            # the number of principal components to keep has
            # been specified by the fraction of variance to be explained
            varcum = (d / vartot).cumsum(axis=0)
            # select only the relevant eigenvalues
            # number of relevant eigenvalues
            neigval = varcum.searchsorted(self.desired_variance) + 1.
            #self.explained_variance = varcum[neigval-1]
            # cut
            d = d[0:neigval]
            v = v[:, 0:neigval]
            # define the new output dimension
            self.output_dim = int(neigval)

        # automatic dimensionality reduction
        if self.reduce:
            # remove entries that are smaller then var_abs and
            # smaller then var_rel relative to the maximum
            d = d[d > self.var_abs]
            # check that we did not throw away everything
            if len(d) == 0:
                raise mdp.NodeException('No eigenvalues larger than'
                                        ' var_abs=%e!' % self.var_abs)
            d = d[d / d.max() > self.var_rel]

            # filter for variance relative to total variance
            if self.var_part:
                d = d[d / vartot > self.var_part]

            v = v[:, 0:d.shape[0]]
            self._output_dim = d.shape[0]

        # set explained variance
        self.explained_variance = d.sum() / vartot

        # store the eigenvalues
        self.d = d
        # store the eigenvectors
        self.v = v
        # store the total variance
        self.total_variance = vartot
Ejemplo n.º 23
0
class SFANode(Node):
    """Extract the slowly varying components from the input data.
    More information about Slow Feature Analysis can be found in
    Wiskott, L. and Sejnowski, T.J., Slow Feature Analysis: Unsupervised
    Learning of Invariances, Neural Computation, 14(4):715-770 (2002).

    **Instance variables of interest**

      ``self.avg``
          Mean of the input data (available after training)

      ``self.sf``
          Matrix of the SFA filters (available after training)

      ``self.d``
          Delta values corresponding to the SFA components (generalized
          eigenvalues). [See the docs of the ``get_eta_values`` method for
          more information]

    **Special arguments for constructor**

      ``include_last_sample``
          If ``False`` the `train` method discards the last sample in every
          chunk during training when calculating the covariance matrix.
          The last sample is in this case only used for calculating the
          covariance matrix of the derivatives. The switch should be set
          to ``False`` if you plan to train with several small chunks. For
          example we can split a sequence (index is time)::

            x_1 x_2 x_3 x_4

          in smaller parts like this::

            x_1 x_2
            x_2 x_3
            x_3 x_4

          The SFANode will see 3 derivatives for the temporal covariance
          matrix, and the first 3 points for the spatial covariance matrix.
          Of course you will need to use a generator that *connects* the
          small chunks (the last sample needs to be sent again in the next
          chunk). If ``include_last_sample`` was True, depending on the
          generator you use, you would either get::

             x_1 x_2
             x_2 x_3
             x_3 x_4

          in which case the last sample of every chunk would be used twice
          when calculating the covariance matrix, or::

             x_1 x_2
             x_3 x_4

          in which case you loose the derivative between ``x_3`` and ``x_2``.

          If you plan to train with a single big chunk leave
          ``include_last_sample`` to the default value, i.e. ``True``.

          You can even change this behaviour during training. Just set the
          corresponding switch in the `train` method.
    """

    def __init__(self, input_dim=None, output_dim=None, dtype=None,
                 include_last_sample=True):
        """
        For the ``include_last_sample`` switch have a look at the
        SFANode class docstring.
         """
        super(SFANode, self).__init__(input_dim, output_dim, dtype)
        self._include_last_sample = include_last_sample

        # init two covariance matrices
        # one for the input data
        self._cov_mtx = CovarianceMatrix(dtype)
        # one for the derivatives
        self._dcov_mtx = CovarianceMatrix(dtype)

        # set routine for eigenproblem
        self._symeig = symeig

        # SFA eigenvalues and eigenvectors, will be set after training
        self.d = None
        self.sf = None  # second index for outputs
        self.avg = None
        self._bias = None  # avg multiplied with sf
        self.tlen = None

    def time_derivative(self, x):
        """Compute the linear approximation of the time derivative."""
        # this is faster than a linear_filter or a weave-inline solution
        return x[1:, :]-x[:-1, :]

    def _set_range(self):
        if self.output_dim is not None and self.output_dim <= self.input_dim:
            # (eigenvalues sorted in ascending order)
            rng = (1, self.output_dim)
        else:
            # otherwise, keep all output components
            rng = None
            self.output_dim = self.input_dim
        return rng

    def _check_train_args(self, x, *args, **kwargs):
        # check that we have at least 2 time samples to
        # compute the update for the derivative covariance matrix
        s = x.shape[0]
        if  s < 2:
            raise TrainingException('Need at least 2 time samples to '
                                    'compute time derivative (%d given)'%s)
        
    def _train(self, x, include_last_sample=None):
        """
        For the ``include_last_sample`` switch have a look at the
        SFANode class docstring.
        """
        if include_last_sample is None:
            include_last_sample = self._include_last_sample
        # works because x[:None] == x[:]
        last_sample_index = None if include_last_sample else -1

        # update the covariance matrices
        self._cov_mtx.update(x[:last_sample_index, :])
        self._dcov_mtx.update(self.time_derivative(x))

    def _stop_training(self, debug=False):
        ##### request the covariance matrices and clean up
        self.cov_mtx, self.avg, self.tlen = self._cov_mtx.fix()
        del self._cov_mtx
        # do not center around the mean:
        # we want the second moment matrix (centered about 0) and
        # not the second central moment matrix (centered about the mean), i.e.
        # the covariance matrix
        self.dcov_mtx, self.davg, self.dtlen = self._dcov_mtx.fix(center=False)
        del self._dcov_mtx

        rng = self._set_range()

        #### solve the generalized eigenvalue problem
        # the eigenvalues are already ordered in ascending order
        try:
            self.d, self.sf = self._symeig(self.dcov_mtx, self.cov_mtx,
                                     range=rng, overwrite=(not debug))
            d = self.d
            # check that we get only *positive* eigenvalues
            if d.min() < 0:
                err_msg = ("Got negative eigenvalues: %s."
                           " You may either set output_dim to be smaller,"
                           " or prepend the SFANode with a PCANode(reduce=True)"
                           " or PCANode(svd=True)"% str(d))
                raise NodeException(err_msg)
        except SymeigException, exception:
            errstr = str(exception)+"\n Covariance matrices may be singular."
            raise NodeException(errstr)

        if not debug:
            # delete covariance matrix if no exception occurred
            del self.cov_mtx
            del self.dcov_mtx

        # store bias
        self._bias = mult(self.avg, self.sf)