Exemple #1
0
    def increment(self, samples, n_samples=None, verbose=False):
        r"""
        Update the mean and precision matrix of the GMRF by updating the
        distributions of all the edges.

        Parameters
        ----------
        samples : `list` or `iterable` of :map:`Vectorizable`
            List or iterable of samples to build the model from.
        n_samples : `int`, optional
            If provided then ``samples``  must be an iterator that yields
            ``n_samples``. If not provided then samples has to be a
            list (so we know how large the data matrix needs to be).
        verbose : `bool`, optional
            If ``True``, the progress of the model's incremental update is
            printed.
        """
        # Check if it can be incrementally updated
        if not self.is_incremental:
            raise ValueError('GMRF cannot be incrementally updated.')

        # Build a data matrix from the new samples
        data = as_matrix(samples, length=n_samples, verbose=verbose)

        # Increment the model
        self._increment(data=data, verbose=verbose)
def test_as_matrix_return_template():
    data, t = as_matrix((template.copy() for _ in range(n_images)),
                        length=1,
                        return_template=True)
    # Two rows of the mask are True (10 * 2 = 20)
    assert_equal(data.shape, (1, 20))
    assert_equal(t.shape, image_shape)
def _compute_sum_cost_block_sparse(samples, test_sample, graph,
                                   n_features_per_vertex, subtract_mean, mode):
    # create ndarray with data
    data = as_matrix(samples, length=None, return_template=False)
    # initialize cost
    cost = 0.
    # for loop over the graph's edges
    for e in graph.edges:
        v1 = e[0]
        v2 = e[1]
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        v2_from = v2 * n_features_per_vertex
        v2_to = (v2 + 1) * n_features_per_vertex
        # slice data and test vector
        y = test_sample.as_vector()
        if mode == 'concatenation':
            x = np.hstack((data[:, v1_from:v1_to], data[:, v2_from:v2_to]))
            y = np.hstack((y[v1_from:v1_to], y[v2_from:v2_to]))
        else:
            x = data[:, v1_from:v1_to] - data[:, v2_from:v2_to]
            y = y[v1_from:v1_to] - y[v2_from:v2_to]
        # compute mean and covariance
        cov = np.linalg.inv(np.cov(x.T))
        mean = np.mean(x, axis=0)
        # compute and sum cost
        if subtract_mean:
            v = y - mean
        else:
            v = y
        cost += v.dot(cov).T.dot(v)
    return cost
    def __init__(self,
                 samples,
                 centre=True,
                 n_samples=None,
                 max_n_components=None,
                 inplace=True,
                 verbose=False):

        # build a data matrix from all the samples
        data, template = as_matrix(samples,
                                   length=n_samples,
                                   return_template=True,
                                   verbose=verbose)
        data, E = rpca_pcp(data)

        print E

        n_samples = data.shape[0]

        PCAVectorModel.__init__(self,
                                data,
                                centre=centre,
                                max_n_components=max_n_components,
                                n_samples=n_samples,
                                inplace=inplace)
        VectorizableBackedModel.__init__(self, template)
Exemple #5
0
    def __init__(
        self,
        samples,
        graph,
        mode="concatenation",
        n_components=None,
        dtype=np.float64,
        sparse=True,
        n_samples=None,
        bias=0,
        incremental=False,
        verbose=False,
    ):
        # Build a data matrix from all the samples
        data, self.template_instance = as_matrix(samples,
                                                 length=n_samples,
                                                 return_template=True,
                                                 verbose=verbose)
        n_samples = data.shape[0]

        GMRFVectorModel.__init__(
            self,
            data,
            graph,
            mode=mode,
            n_components=n_components,
            dtype=dtype,
            sparse=sparse,
            n_samples=n_samples,
            bias=bias,
            incremental=incremental,
            verbose=verbose,
        )
Exemple #6
0
    def mahalanobis_distance(self,
                             samples,
                             subtract_mean=True,
                             square_root=False):
        r"""
        Compute the mahalanobis distance given a sample :math:`\mathbf{x}` or an
        array of samples :math:`\mathbf{X}`, i.e.

        .. math::
           \sqrt{(\mathbf{x}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{x}-\boldsymbol{\mu})}
           \text{ or }
           \sqrt{(\mathbf{X}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{X}-\boldsymbol{\mu})}

        Parameters
        ----------
        samples : :map:`Vectorizable` or `list` of :map:`Vectorizable`
            The new data sample or a list of samples.
        subtract_mean : `bool`, optional
            When ``True``, the mean vector is subtracted from the data vector.
        square_root : `bool`, optional
            If ``False``, the mahalanobis distance gets squared.
        """
        if isinstance(samples, list):
            samples = as_matrix(samples,
                                length=None,
                                return_template=False,
                                verbose=False)
        else:
            samples = samples.as_vector()[..., None].T
        return self._mahalanobis_distance(samples=samples,
                                          subtract_mean=subtract_mean,
                                          square_root=square_root)
Exemple #7
0
    def increment(self, samples, n_samples=None, verbose=False):
        r"""
        Update the mean and precision matrix of the GMRF by updating the
        distributions of all the edges.

        Parameters
        ----------
        samples : `list` or `iterable` of :map:`Vectorizable`
            List or iterable of samples to build the model from.
        n_samples : `int`, optional
            If provided then ``samples``  must be an iterator that yields
            ``n_samples``. If not provided then samples has to be a
            list (so we know how large the data matrix needs to be).
        verbose : `bool`, optional
            If ``True``, the progress of the model's incremental update is
            printed.
        """
        # Check if it can be incrementally updated
        if not self.is_incremental:
            raise ValueError("GMRF cannot be incrementally updated.")

        # Build a data matrix from the new samples
        data = as_matrix(samples, length=n_samples, verbose=verbose)

        # Increment the model
        self._increment(data=data, verbose=verbose)
def _compute_sum_cost_block_sparse(samples, test_sample, graph,
                                   n_features_per_vertex, subtract_mean, mode):
    # create ndarray with data
    data = as_matrix(samples, length=None, return_template=False)
    # initialize cost
    cost = 0.
    # for loop over the graph's edges
    for e in graph.edges:
        v1 = e[0]
        v2 = e[1]
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        v2_from = v2 * n_features_per_vertex
        v2_to = (v2 + 1) * n_features_per_vertex
        # slice data and test vector
        y = test_sample.as_vector()
        if mode == 'concatenation':
            x = np.hstack((data[:, v1_from:v1_to], data[:, v2_from:v2_to]))
            y = np.hstack((y[v1_from:v1_to], y[v2_from:v2_to]))
        else:
            x = data[:, v1_from:v1_to] - data[:, v2_from:v2_to]
            y = y[v1_from:v1_to] - y[v2_from:v2_to]
        # compute mean and covariance
        cov = np.linalg.inv(np.cov(x.T))
        mean = np.mean(x, axis=0)
        # compute and sum cost
        if subtract_mean:
            v = y - mean
        else:
            v = y
        cost += v.dot(cov).T.dot(v)
    return cost
Exemple #9
0
    def mahalanobis_distance(self, samples, subtract_mean=True,
                             square_root=False):
        r"""
        Compute the mahalanobis distance given a sample :math:`\mathbf{x}` or an
        array of samples :math:`\mathbf{X}`, i.e.

        .. math::
           \sqrt{(\mathbf{x}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{x}-\boldsymbol{\mu})}
           \text{ or }
           \sqrt{(\mathbf{X}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{X}-\boldsymbol{\mu})}

        Parameters
        ----------
        samples : :map:`Vectorizable` or `list` of :map:`Vectorizable`
            The new data sample or a list of samples.
        subtract_mean : `bool`, optional
            When ``True``, the mean vector is subtracted from the data vector.
        square_root : `bool`, optional
            If ``False``, the mahalanobis distance gets squared.
        """
        if isinstance(samples, list):
            samples = as_matrix(samples, length=None,
                                return_template=False, verbose=False)
        else:
            samples = samples.as_vector()[..., None].T
        return self._mahalanobis_distance(samples=samples,
                                          subtract_mean=subtract_mean,
                                          square_root=square_root)
def test_pca_init_from_covariance():
    n_samples = 30
    n_features = 10
    n_dims = 2
    centre_values = [True, False]
    for centre in centre_values:
        # generate samples list and convert it to nd.array
        samples = [
            PointCloud(np.random.randn(n_features, n_dims))
            for _ in range(n_samples)
        ]
        data, template = as_matrix(samples, return_template=True)
        # compute covariance matrix and mean
        if centre:
            mean_vector = np.mean(data, axis=0)
            mean = template.from_vector(mean_vector)
            X = data - mean_vector
            C = np.dot(X.T, X) / (n_samples - 1)
        else:
            mean = samples[0]
            C = np.dot(data.T, data) / (n_samples - 1)
        # create the 2 pca models
        pca1 = PCAModel.init_from_covariance_matrix(C,
                                                    mean,
                                                    centred=centre,
                                                    n_samples=n_samples)
        pca2 = PCAModel(samples, centre=centre)
        # compare them
        assert_array_almost_equal(pca1.component_vector(0, with_mean=False),
                                  pca2.component_vector(0, with_mean=False))
        assert_array_almost_equal(
            pca1.component(7).as_vector(),
            pca2.component(7).as_vector())
        assert_array_almost_equal(pca1.components, pca2.components)
        assert_array_almost_equal(pca1.eigenvalues, pca2.eigenvalues)
        assert_array_almost_equal(pca1.eigenvalues_cumulative_ratio(),
                                  pca2.eigenvalues_cumulative_ratio())
        assert_array_almost_equal(pca1.eigenvalues_ratio(),
                                  pca2.eigenvalues_ratio())
        weights = np.random.randn(pca1.n_active_components)
        assert_array_almost_equal(
            pca1.instance(weights).as_vector(),
            pca2.instance(weights).as_vector())
        weights2 = np.random.randn(pca1.n_active_components - 4)
        assert_array_almost_equal(pca1.instance_vector(weights2),
                                  pca2.instance_vector(weights2))
        assert_array_almost_equal(pca1.mean().as_vector(),
                                  pca2.mean().as_vector())
        assert_array_almost_equal(pca1.mean_vector, pca2.mean_vector)
        assert (pca1.n_active_components == pca2.n_active_components)
        assert (pca1.n_components == pca2.n_components)
        assert (pca1.n_features == pca2.n_features)
        assert (pca1.n_samples == pca2.n_samples)
        assert (pca1.noise_variance() == pca2.noise_variance())
        assert (pca1.noise_variance_ratio() == pca2.noise_variance_ratio())
        assert_almost_equal(pca1.variance(), pca2.variance())
        assert_almost_equal(pca1.variance_ratio(), pca2.variance_ratio())
        assert_array_almost_equal(pca1.whitened_components(),
                                  pca2.whitened_components())
Exemple #11
0
    def increment(self,
                  samples,
                  n_samples=None,
                  forgetting_factor=1.0,
                  verbose=False):
        r"""
        Update the eigenvectors, eigenvalues and mean vector of this model
        by performing incremental PCA on the given samples.

        Parameters
        ----------
        samples : `list` of :map:`Vectorizable`
            List of new samples to update the model from.
        n_samples : `int`, optional
            If provided then ``samples``  must be an iterator that yields
            ``n_samples``. If not provided then samples has to be a
            list (so we know how large the data matrix needs to be).
        forgetting_factor : ``[0.0, 1.0]`` `float`, optional
            Forgetting factor that weights the relative contribution of new
            samples vs old samples. If 1.0, all samples are weighted equally
            and, hence, the results is the exact same as performing batch
            PCA on the concatenated list of old and new simples. If <1.0,
            more emphasis is put on the new samples. See [1] for details.

        References
        ----------
        .. [1] David Ross, Jongwoo Lim, Ruei-Sung Lin, Ming-Hsuan Yang.
           "Incremental Learning for Robust Visual Tracking". IJCV, 2007.
        """
        # build a data matrix from the new samples
        data = as_matrix(samples, length=n_samples, verbose=verbose)
        # (n_samples, n_features)
        n_new_samples = data.shape[0]

        # compute incremental pca
        e_vectors, e_values, m_vector = ipca(data,
                                             self._components,
                                             self._eigenvalues,
                                             self.n_samples,
                                             m_a=self.mean_vector,
                                             f=forgetting_factor)

        # if the number of active components is the same as the total number
        # of components so it will be after this method is executed
        reset = (self.n_active_components == self.n_components)

        # update mean, components, eigenvalues and number of samples
        self.mean_vector = m_vector
        self._components = e_vectors
        self._eigenvalues = e_values
        self.n_samples += n_new_samples

        # reset the number of active components to the total number of
        # components
        if reset:
            self.n_active_components = self.n_components
Exemple #12
0
def test_pca_init_from_covariance():
    n_samples = 30
    n_features = 10
    n_dims = 2
    centre_values = [True, False]
    for centre in centre_values:
        # generate samples list and convert it to nd.array
        samples = [PointCloud(np.random.randn(n_features, n_dims))
                   for _ in range(n_samples)]
        data, template = as_matrix(samples, return_template=True)
        # compute covariance matrix and mean
        if centre:
            mean_vector = np.mean(data, axis=0)
            mean = template.from_vector(mean_vector)
            X = data - mean_vector
            C = np.dot(X.T, X) / (n_samples - 1)
        else:
            mean = samples[0]
            C = np.dot(data.T, data) / (n_samples - 1)
        # create the 2 pca models
        pca1 = PCAModel.init_from_covariance_matrix(C, mean,
                                                    centred=centre,
                                                    n_samples=n_samples)
        pca2 = PCAModel(samples, centre=centre)
        # compare them
        assert_array_almost_equal(pca1.component_vector(0, with_mean=False),
                                  pca2.component_vector(0, with_mean=False))
        assert_array_almost_equal(pca1.component(7).as_vector(),
                                  pca2.component(7).as_vector())
        assert_array_almost_equal(pca1.components, pca2.components)
        assert_array_almost_equal(pca1.eigenvalues, pca2.eigenvalues)
        assert_array_almost_equal(pca1.eigenvalues_cumulative_ratio(),
                                  pca2.eigenvalues_cumulative_ratio())
        assert_array_almost_equal(pca1.eigenvalues_ratio(),
                                  pca2.eigenvalues_ratio())
        weights = np.random.randn(pca1.n_active_components)
        assert_array_almost_equal(pca1.instance(weights).as_vector(),
                                  pca2.instance(weights).as_vector())
        weights2 = np.random.randn(pca1.n_active_components - 4)
        assert_array_almost_equal(pca1.instance_vector(weights2),
                                  pca2.instance_vector(weights2))
        assert_array_almost_equal(pca1.mean().as_vector(),
                                  pca2.mean().as_vector())
        assert_array_almost_equal(pca1.mean_vector,
                                  pca2.mean_vector)
        assert(pca1.n_active_components == pca2.n_active_components)
        assert(pca1.n_components == pca2.n_components)
        assert(pca1.n_features == pca2.n_features)
        assert(pca1.n_samples == pca2.n_samples)
        assert(pca1.noise_variance() == pca2.noise_variance())
        assert(pca1.noise_variance_ratio() == pca2.noise_variance_ratio())
        assert_almost_equal(pca1.variance(), pca2.variance())
        assert_almost_equal(pca1.variance_ratio(), pca2.variance_ratio())
        assert_array_almost_equal(pca1.whitened_components(),
                                  pca2.whitened_components())
Exemple #13
0
    def __init__(self, samples, graph, mode='concatenation', n_components=None,
                 dtype=np.float64, sparse=True, n_samples=None, bias=0,
                 incremental=False, verbose=False):
        # Build a data matrix from all the samples
        data, self.template_instance = as_matrix(
            samples, length=n_samples, return_template=True, verbose=verbose)
        n_samples = data.shape[0]

        GMRFVectorModel.__init__(self, data, graph, mode=mode,
                                 n_components=n_components, dtype=dtype,
                                 sparse=sparse, n_samples=n_samples, bias=bias,
                                 incremental=incremental, verbose=verbose)
Exemple #14
0
    def increment(self, samples, n_samples=None, forgetting_factor=1.0,
                  verbose=False):
        r"""
        Update the eigenvectors, eigenvalues and mean vector of this model
        by performing incremental PCA on the given samples.

        Parameters
        ----------
        samples : `list` of :map:`Vectorizable`
            List of new samples to update the model from.
        n_samples : `int`, optional
            If provided then ``samples``  must be an iterator that yields
            ``n_samples``. If not provided then samples has to be a
            list (so we know how large the data matrix needs to be).
        forgetting_factor : ``[0.0, 1.0]`` `float`, optional
            Forgetting factor that weights the relative contribution of new
            samples vs old samples. If 1.0, all samples are weighted equally
            and, hence, the results is the exact same as performing batch
            PCA on the concatenated list of old and new simples. If <1.0,
            more emphasis is put on the new samples. See [1] for details.

        References
        ----------
        .. [1] David Ross, Jongwoo Lim, Ruei-Sung Lin, Ming-Hsuan Yang.
           "Incremental Learning for Robust Visual Tracking". IJCV, 2007.
        """
        # build a data matrix from the new samples
        data = as_matrix(samples, length=n_samples, verbose=verbose)
        # (n_samples, n_features)
        n_new_samples = data.shape[0]

        # compute incremental pca
        e_vectors, e_values, m_vector = ipca(
            data, self._components, self._eigenvalues, self.n_samples,
            m_a=self.mean_vector, f=forgetting_factor)

        # if the number of active components is the same as the total number
        # of components so it will be after this method is executed
        reset = (self.n_active_components == self.n_components)

        # update mean, components, eigenvalues and number of samples
        self.mean_vector = m_vector
        self._components = e_vectors
        self._eigenvalues = e_values
        self.n_samples += n_new_samples

        # reset the number of active components to the total number of
        # components
        if reset:
            self.n_active_components = self.n_components
Exemple #15
0
    def __init__(self, samples, centre=True, n_samples=None, verbose=False):
        # build a data matrix from all the samples
        data, template = as_matrix(samples, length=n_samples,
                                   return_template=True, verbose=verbose)
        # (n_samples, n_features)
        self.n_samples = data.shape[0]

        # compute pca
        e_vectors, e_values, mean = pca(data, centre=centre, inplace=True)

        super(PCAModel, self).__init__(e_vectors, mean, template)
        self.centred = centre
        self._eigenvalues = e_values
        # start the active components as all the components
        self._n_active_components = int(self.n_components)
        self._trimmed_eigenvalues = np.array([])
Exemple #16
0
    def __init__(self, samples, centre=True, n_samples=None, verbose=False):
        # build a data matrix from all the samples
        data, template = as_matrix(samples,
                                   length=n_samples,
                                   return_template=True,
                                   verbose=verbose)
        # (n_samples, n_features)
        self.n_samples = data.shape[0]

        # compute pca
        e_vectors, e_values, mean = pca(data, centre=centre, inplace=True)

        super(PCAModel, self).__init__(e_vectors, mean, template)
        self.centred = centre
        self._eigenvalues = e_values
        # start the active components as all the components
        self._n_active_components = int(self.n_components)
        self._trimmed_eigenvalues = np.array([])
    def increment(self,
                  samples,
                  n_samples=None,
                  forgetting_factor=1.0,
                  verbose=False):
        r"""
        Update the eigenvectors, eigenvalues and mean vector of this model
        by performing incremental PCA on the given samples.

        Parameters
        ----------
        samples : `list` of :map:`Vectorizable`
            List of new samples to update the model from.
        n_samples : `int`, optional
            If provided then ``samples``  must be an iterator that yields
            ``n_samples``. If not provided then samples has to be a
            list (so we know how large the data matrix needs to be).
        forgetting_factor : ``[0.0, 1.0]`` `float`, optional
            Forgetting factor that weights the relative contribution of new
            samples vs old samples. If 1.0, all samples are weighted equally
            and, hence, the results is the exact same as performing batch
            PCA on the concatenated list of old and new simples. If <1.0,
            more emphasis is put on the new samples. See [1] for details.

        References
        ----------
        .. [1] David Ross, Jongwoo Lim, Ruei-Sung Lin, Ming-Hsuan Yang.
           "Incremental Learning for Robust Visual Tracking". IJCV, 2007.
        """
        # build a data matrix from the new samples
        data = as_matrix(samples, length=n_samples, verbose=verbose)
        n_new_samples = data.shape[0]
        PCAVectorModel.increment(self,
                                 data,
                                 n_samples=n_new_samples,
                                 forgetting_factor=forgetting_factor,
                                 verbose=verbose)
def _compute_sum_cost_block_diagonal(samples, test_sample, graph,
                                     n_features_per_vertex, subtract_mean):
    # create ndarray with data
    data = as_matrix(samples, length=None, return_template=False)
    # initialize cost
    cost = 0.
    # for loop over the graph's edges
    for v1 in graph.vertices:
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        # slice data and test vector
        y = test_sample.as_vector()
        x = data[:, v1_from:v1_to]
        y = y[v1_from:v1_to]
        # compute mean and covariance
        cov = np.linalg.inv(np.cov(x.T))
        mean = np.mean(x, axis=0)
        # compute and sum cost
        if subtract_mean:
            v = y - mean
        else:
            v = y
        cost += v.dot(cov).T.dot(v)
    return cost
def _compute_sum_cost_block_diagonal(samples, test_sample, graph,
                                     n_features_per_vertex, subtract_mean):
    # create ndarray with data
    data = as_matrix(samples, length=None, return_template=False)
    # initialize cost
    cost = 0.
    # for loop over the graph's edges
    for v1 in graph.vertices:
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        # slice data and test vector
        y = test_sample.as_vector()
        x = data[:, v1_from:v1_to]
        y = y[v1_from:v1_to]
        # compute mean and covariance
        cov = np.linalg.inv(np.cov(x.T))
        mean = np.mean(x, axis=0)
        # compute and sum cost
        if subtract_mean:
            v = y - mean
        else:
            v = y
        cost += v.dot(cov).T.dot(v)
    return cost
def test_as_matrix_long_length_raises_value_error():
    as_matrix((template.copy() for _ in range(4)), length=5)
def test_as_matrix_short_length():
    data = as_matrix((template.copy() for _ in range(n_images)), length=1)
    # Two rows of the mask are True (10 * 2 = 20)
    assert_equal(data.shape, (1, 20))
def test_as_matrix_list():
    data = as_matrix([template.copy() for _ in range(n_images)])
    # Two rows of the mask are True (10 * 2 = 20)
    assert_equal(data.shape, (n_images, 20))
Exemple #23
0
def test_as_matrix_return_template():
    data, t = as_matrix((template.copy() for _ in range(n_images)),
                        length=1, return_template=True)
    # Two rows of the mask are True (10 * 2 = 20)
    assert_equal(data.shape, (1, 20))
    assert_equal(t.shape, image_shape)
Exemple #24
0
def test_as_matrix_short_length():
    data = as_matrix((template.copy() for _ in range(n_images)), length=1)
    # Two rows of the mask are True (10 * 2 = 20)
    assert_equal(data.shape, (1, 20))
Exemple #25
0
def test_as_matrix_list():
    data = as_matrix([template.copy() for _ in range(n_images)])
    # Two rows of the mask are True (10 * 2 = 20)
    assert_equal(data.shape, (n_images, 20))
Exemple #26
0
def test_as_matrix_long_length_raises_value_error():
    as_matrix((template.copy() for _ in range(4)), length=5)