def increment(self, samples, n_samples=None, verbose=False): r""" Update the mean and precision matrix of the GMRF by updating the distributions of all the edges. Parameters ---------- samples : `list` or `iterable` of :map:`Vectorizable` List or iterable of samples to build the model from. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a list (so we know how large the data matrix needs to be). verbose : `bool`, optional If ``True``, the progress of the model's incremental update is printed. """ # Check if it can be incrementally updated if not self.is_incremental: raise ValueError('GMRF cannot be incrementally updated.') # Build a data matrix from the new samples data = as_matrix(samples, length=n_samples, verbose=verbose) # Increment the model self._increment(data=data, verbose=verbose)
def test_as_matrix_return_template(): data, t = as_matrix((template.copy() for _ in range(n_images)), length=1, return_template=True) # Two rows of the mask are True (10 * 2 = 20) assert_equal(data.shape, (1, 20)) assert_equal(t.shape, image_shape)
def _compute_sum_cost_block_sparse(samples, test_sample, graph, n_features_per_vertex, subtract_mean, mode): # create ndarray with data data = as_matrix(samples, length=None, return_template=False) # initialize cost cost = 0. # for loop over the graph's edges for e in graph.edges: v1 = e[0] v2 = e[1] v1_from = v1 * n_features_per_vertex v1_to = (v1 + 1) * n_features_per_vertex v2_from = v2 * n_features_per_vertex v2_to = (v2 + 1) * n_features_per_vertex # slice data and test vector y = test_sample.as_vector() if mode == 'concatenation': x = np.hstack((data[:, v1_from:v1_to], data[:, v2_from:v2_to])) y = np.hstack((y[v1_from:v1_to], y[v2_from:v2_to])) else: x = data[:, v1_from:v1_to] - data[:, v2_from:v2_to] y = y[v1_from:v1_to] - y[v2_from:v2_to] # compute mean and covariance cov = np.linalg.inv(np.cov(x.T)) mean = np.mean(x, axis=0) # compute and sum cost if subtract_mean: v = y - mean else: v = y cost += v.dot(cov).T.dot(v) return cost
def __init__(self, samples, centre=True, n_samples=None, max_n_components=None, inplace=True, verbose=False): # build a data matrix from all the samples data, template = as_matrix(samples, length=n_samples, return_template=True, verbose=verbose) data, E = rpca_pcp(data) print E n_samples = data.shape[0] PCAVectorModel.__init__(self, data, centre=centre, max_n_components=max_n_components, n_samples=n_samples, inplace=inplace) VectorizableBackedModel.__init__(self, template)
def __init__( self, samples, graph, mode="concatenation", n_components=None, dtype=np.float64, sparse=True, n_samples=None, bias=0, incremental=False, verbose=False, ): # Build a data matrix from all the samples data, self.template_instance = as_matrix(samples, length=n_samples, return_template=True, verbose=verbose) n_samples = data.shape[0] GMRFVectorModel.__init__( self, data, graph, mode=mode, n_components=n_components, dtype=dtype, sparse=sparse, n_samples=n_samples, bias=bias, incremental=incremental, verbose=verbose, )
def mahalanobis_distance(self, samples, subtract_mean=True, square_root=False): r""" Compute the mahalanobis distance given a sample :math:`\mathbf{x}` or an array of samples :math:`\mathbf{X}`, i.e. .. math:: \sqrt{(\mathbf{x}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{x}-\boldsymbol{\mu})} \text{ or } \sqrt{(\mathbf{X}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{X}-\boldsymbol{\mu})} Parameters ---------- samples : :map:`Vectorizable` or `list` of :map:`Vectorizable` The new data sample or a list of samples. subtract_mean : `bool`, optional When ``True``, the mean vector is subtracted from the data vector. square_root : `bool`, optional If ``False``, the mahalanobis distance gets squared. """ if isinstance(samples, list): samples = as_matrix(samples, length=None, return_template=False, verbose=False) else: samples = samples.as_vector()[..., None].T return self._mahalanobis_distance(samples=samples, subtract_mean=subtract_mean, square_root=square_root)
def increment(self, samples, n_samples=None, verbose=False): r""" Update the mean and precision matrix of the GMRF by updating the distributions of all the edges. Parameters ---------- samples : `list` or `iterable` of :map:`Vectorizable` List or iterable of samples to build the model from. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a list (so we know how large the data matrix needs to be). verbose : `bool`, optional If ``True``, the progress of the model's incremental update is printed. """ # Check if it can be incrementally updated if not self.is_incremental: raise ValueError("GMRF cannot be incrementally updated.") # Build a data matrix from the new samples data = as_matrix(samples, length=n_samples, verbose=verbose) # Increment the model self._increment(data=data, verbose=verbose)
def test_pca_init_from_covariance(): n_samples = 30 n_features = 10 n_dims = 2 centre_values = [True, False] for centre in centre_values: # generate samples list and convert it to nd.array samples = [ PointCloud(np.random.randn(n_features, n_dims)) for _ in range(n_samples) ] data, template = as_matrix(samples, return_template=True) # compute covariance matrix and mean if centre: mean_vector = np.mean(data, axis=0) mean = template.from_vector(mean_vector) X = data - mean_vector C = np.dot(X.T, X) / (n_samples - 1) else: mean = samples[0] C = np.dot(data.T, data) / (n_samples - 1) # create the 2 pca models pca1 = PCAModel.init_from_covariance_matrix(C, mean, centred=centre, n_samples=n_samples) pca2 = PCAModel(samples, centre=centre) # compare them assert_array_almost_equal(pca1.component_vector(0, with_mean=False), pca2.component_vector(0, with_mean=False)) assert_array_almost_equal( pca1.component(7).as_vector(), pca2.component(7).as_vector()) assert_array_almost_equal(pca1.components, pca2.components) assert_array_almost_equal(pca1.eigenvalues, pca2.eigenvalues) assert_array_almost_equal(pca1.eigenvalues_cumulative_ratio(), pca2.eigenvalues_cumulative_ratio()) assert_array_almost_equal(pca1.eigenvalues_ratio(), pca2.eigenvalues_ratio()) weights = np.random.randn(pca1.n_active_components) assert_array_almost_equal( pca1.instance(weights).as_vector(), pca2.instance(weights).as_vector()) weights2 = np.random.randn(pca1.n_active_components - 4) assert_array_almost_equal(pca1.instance_vector(weights2), pca2.instance_vector(weights2)) assert_array_almost_equal(pca1.mean().as_vector(), pca2.mean().as_vector()) assert_array_almost_equal(pca1.mean_vector, pca2.mean_vector) assert (pca1.n_active_components == pca2.n_active_components) assert (pca1.n_components == pca2.n_components) assert (pca1.n_features == pca2.n_features) assert (pca1.n_samples == pca2.n_samples) assert (pca1.noise_variance() == pca2.noise_variance()) assert (pca1.noise_variance_ratio() == pca2.noise_variance_ratio()) assert_almost_equal(pca1.variance(), pca2.variance()) assert_almost_equal(pca1.variance_ratio(), pca2.variance_ratio()) assert_array_almost_equal(pca1.whitened_components(), pca2.whitened_components())
def increment(self, samples, n_samples=None, forgetting_factor=1.0, verbose=False): r""" Update the eigenvectors, eigenvalues and mean vector of this model by performing incremental PCA on the given samples. Parameters ---------- samples : `list` of :map:`Vectorizable` List of new samples to update the model from. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a list (so we know how large the data matrix needs to be). forgetting_factor : ``[0.0, 1.0]`` `float`, optional Forgetting factor that weights the relative contribution of new samples vs old samples. If 1.0, all samples are weighted equally and, hence, the results is the exact same as performing batch PCA on the concatenated list of old and new simples. If <1.0, more emphasis is put on the new samples. See [1] for details. References ---------- .. [1] David Ross, Jongwoo Lim, Ruei-Sung Lin, Ming-Hsuan Yang. "Incremental Learning for Robust Visual Tracking". IJCV, 2007. """ # build a data matrix from the new samples data = as_matrix(samples, length=n_samples, verbose=verbose) # (n_samples, n_features) n_new_samples = data.shape[0] # compute incremental pca e_vectors, e_values, m_vector = ipca(data, self._components, self._eigenvalues, self.n_samples, m_a=self.mean_vector, f=forgetting_factor) # if the number of active components is the same as the total number # of components so it will be after this method is executed reset = (self.n_active_components == self.n_components) # update mean, components, eigenvalues and number of samples self.mean_vector = m_vector self._components = e_vectors self._eigenvalues = e_values self.n_samples += n_new_samples # reset the number of active components to the total number of # components if reset: self.n_active_components = self.n_components
def test_pca_init_from_covariance(): n_samples = 30 n_features = 10 n_dims = 2 centre_values = [True, False] for centre in centre_values: # generate samples list and convert it to nd.array samples = [PointCloud(np.random.randn(n_features, n_dims)) for _ in range(n_samples)] data, template = as_matrix(samples, return_template=True) # compute covariance matrix and mean if centre: mean_vector = np.mean(data, axis=0) mean = template.from_vector(mean_vector) X = data - mean_vector C = np.dot(X.T, X) / (n_samples - 1) else: mean = samples[0] C = np.dot(data.T, data) / (n_samples - 1) # create the 2 pca models pca1 = PCAModel.init_from_covariance_matrix(C, mean, centred=centre, n_samples=n_samples) pca2 = PCAModel(samples, centre=centre) # compare them assert_array_almost_equal(pca1.component_vector(0, with_mean=False), pca2.component_vector(0, with_mean=False)) assert_array_almost_equal(pca1.component(7).as_vector(), pca2.component(7).as_vector()) assert_array_almost_equal(pca1.components, pca2.components) assert_array_almost_equal(pca1.eigenvalues, pca2.eigenvalues) assert_array_almost_equal(pca1.eigenvalues_cumulative_ratio(), pca2.eigenvalues_cumulative_ratio()) assert_array_almost_equal(pca1.eigenvalues_ratio(), pca2.eigenvalues_ratio()) weights = np.random.randn(pca1.n_active_components) assert_array_almost_equal(pca1.instance(weights).as_vector(), pca2.instance(weights).as_vector()) weights2 = np.random.randn(pca1.n_active_components - 4) assert_array_almost_equal(pca1.instance_vector(weights2), pca2.instance_vector(weights2)) assert_array_almost_equal(pca1.mean().as_vector(), pca2.mean().as_vector()) assert_array_almost_equal(pca1.mean_vector, pca2.mean_vector) assert(pca1.n_active_components == pca2.n_active_components) assert(pca1.n_components == pca2.n_components) assert(pca1.n_features == pca2.n_features) assert(pca1.n_samples == pca2.n_samples) assert(pca1.noise_variance() == pca2.noise_variance()) assert(pca1.noise_variance_ratio() == pca2.noise_variance_ratio()) assert_almost_equal(pca1.variance(), pca2.variance()) assert_almost_equal(pca1.variance_ratio(), pca2.variance_ratio()) assert_array_almost_equal(pca1.whitened_components(), pca2.whitened_components())
def __init__(self, samples, graph, mode='concatenation', n_components=None, dtype=np.float64, sparse=True, n_samples=None, bias=0, incremental=False, verbose=False): # Build a data matrix from all the samples data, self.template_instance = as_matrix( samples, length=n_samples, return_template=True, verbose=verbose) n_samples = data.shape[0] GMRFVectorModel.__init__(self, data, graph, mode=mode, n_components=n_components, dtype=dtype, sparse=sparse, n_samples=n_samples, bias=bias, incremental=incremental, verbose=verbose)
def increment(self, samples, n_samples=None, forgetting_factor=1.0, verbose=False): r""" Update the eigenvectors, eigenvalues and mean vector of this model by performing incremental PCA on the given samples. Parameters ---------- samples : `list` of :map:`Vectorizable` List of new samples to update the model from. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a list (so we know how large the data matrix needs to be). forgetting_factor : ``[0.0, 1.0]`` `float`, optional Forgetting factor that weights the relative contribution of new samples vs old samples. If 1.0, all samples are weighted equally and, hence, the results is the exact same as performing batch PCA on the concatenated list of old and new simples. If <1.0, more emphasis is put on the new samples. See [1] for details. References ---------- .. [1] David Ross, Jongwoo Lim, Ruei-Sung Lin, Ming-Hsuan Yang. "Incremental Learning for Robust Visual Tracking". IJCV, 2007. """ # build a data matrix from the new samples data = as_matrix(samples, length=n_samples, verbose=verbose) # (n_samples, n_features) n_new_samples = data.shape[0] # compute incremental pca e_vectors, e_values, m_vector = ipca( data, self._components, self._eigenvalues, self.n_samples, m_a=self.mean_vector, f=forgetting_factor) # if the number of active components is the same as the total number # of components so it will be after this method is executed reset = (self.n_active_components == self.n_components) # update mean, components, eigenvalues and number of samples self.mean_vector = m_vector self._components = e_vectors self._eigenvalues = e_values self.n_samples += n_new_samples # reset the number of active components to the total number of # components if reset: self.n_active_components = self.n_components
def __init__(self, samples, centre=True, n_samples=None, verbose=False): # build a data matrix from all the samples data, template = as_matrix(samples, length=n_samples, return_template=True, verbose=verbose) # (n_samples, n_features) self.n_samples = data.shape[0] # compute pca e_vectors, e_values, mean = pca(data, centre=centre, inplace=True) super(PCAModel, self).__init__(e_vectors, mean, template) self.centred = centre self._eigenvalues = e_values # start the active components as all the components self._n_active_components = int(self.n_components) self._trimmed_eigenvalues = np.array([])
def increment(self, samples, n_samples=None, forgetting_factor=1.0, verbose=False): r""" Update the eigenvectors, eigenvalues and mean vector of this model by performing incremental PCA on the given samples. Parameters ---------- samples : `list` of :map:`Vectorizable` List of new samples to update the model from. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a list (so we know how large the data matrix needs to be). forgetting_factor : ``[0.0, 1.0]`` `float`, optional Forgetting factor that weights the relative contribution of new samples vs old samples. If 1.0, all samples are weighted equally and, hence, the results is the exact same as performing batch PCA on the concatenated list of old and new simples. If <1.0, more emphasis is put on the new samples. See [1] for details. References ---------- .. [1] David Ross, Jongwoo Lim, Ruei-Sung Lin, Ming-Hsuan Yang. "Incremental Learning for Robust Visual Tracking". IJCV, 2007. """ # build a data matrix from the new samples data = as_matrix(samples, length=n_samples, verbose=verbose) n_new_samples = data.shape[0] PCAVectorModel.increment(self, data, n_samples=n_new_samples, forgetting_factor=forgetting_factor, verbose=verbose)
def _compute_sum_cost_block_diagonal(samples, test_sample, graph, n_features_per_vertex, subtract_mean): # create ndarray with data data = as_matrix(samples, length=None, return_template=False) # initialize cost cost = 0. # for loop over the graph's edges for v1 in graph.vertices: v1_from = v1 * n_features_per_vertex v1_to = (v1 + 1) * n_features_per_vertex # slice data and test vector y = test_sample.as_vector() x = data[:, v1_from:v1_to] y = y[v1_from:v1_to] # compute mean and covariance cov = np.linalg.inv(np.cov(x.T)) mean = np.mean(x, axis=0) # compute and sum cost if subtract_mean: v = y - mean else: v = y cost += v.dot(cov).T.dot(v) return cost
def test_as_matrix_long_length_raises_value_error(): as_matrix((template.copy() for _ in range(4)), length=5)
def test_as_matrix_short_length(): data = as_matrix((template.copy() for _ in range(n_images)), length=1) # Two rows of the mask are True (10 * 2 = 20) assert_equal(data.shape, (1, 20))
def test_as_matrix_list(): data = as_matrix([template.copy() for _ in range(n_images)]) # Two rows of the mask are True (10 * 2 = 20) assert_equal(data.shape, (n_images, 20))