def __init__(self, samples, centre=True, n_samples=None, max_n_components=None, inplace=True, verbose=False): # build a data matrix from all the samples data, template = as_matrix(samples, length=n_samples, return_template=True, verbose=verbose) data, E = rpca_pcp(data) print E n_samples = data.shape[0] PCAVectorModel.__init__(self, data, centre=centre, max_n_components=max_n_components, n_samples=n_samples, inplace=inplace) VectorizableBackedModel.__init__(self, template)
def generate_texture_model_from_image_3d_fits(images_and_fits, lambda_=0.01, n_components=0.99): """Build an ITW texture model from a list of images with associated dense 3D fits (one per image). Note that the input images should already have an image feature taken on them, and have all been resized to a consistent scale.""" f = open("/content/fk.txt", "w") f.write("shit") f.close() feat_img, fit_2d = images_and_fits[0] n_channels = feat_img.n_channels n_features = n_channels * fit_2d.n_points n_samples = len(images_and_fits) X = np.empty((n_samples, n_features), dtype=feat_img.pixels.dtype) M = np.empty_like(X, dtype=np.bool) proportion_masked = [] for i, (img, fit_2d) in enumerate( print_progress(images_and_fits, prefix='Extracting masks & features')): features, mask = extract_per_vertex_colour_with_occlusion(fit_2d, img) mask_repeated = np.repeat(mask.ravel(), n_channels) X[i] = features.ravel() M[i] = mask_repeated.ravel() proportion_masked.append(mask.sum() / mask.shape[0]) print('Performing R-PCA to complete missing textures') A, E = rpca_missing(X, M, verbose=True, lambda_=lambda_) print('R-PCA completed. Building PCA model of features on completed ' 'samples.') model = PCAVectorModel(A, inplace=True) print('Trimming the components to retain only what was required.') model.trim_components(n_components=n_components) return model, X, M
def generate_texture_model_from_itwmm(images, mm, id_ind, exp_ind, template_camera, p, qs, cs, lambda_=0.01, n_components=0.99): r"""Build a new texture model from an existing model and fitting information to a collection of images.""" n_channels = images[0].n_channels n_features = n_channels * mm.n_vertices n_samples = len(images) X = np.empty((n_samples, n_features), dtype=mm.texture_model.mean().dtype) M = np.empty_like(X, dtype=np.bool) proportion_masked = [] for i, (img, q, c) in enumerate(zip(print_progress(images, 'Extracting ' 'masks and ' 'features'), qs, cs)): i_in_img = instance_for_params(mm, id_ind, exp_ind, template_camera, p, q, c)['instance_in_img'] features, mask = extract_per_vertex_colour_with_occlusion(i_in_img, img) mask_repeated = np.repeat(mask.ravel(), n_channels) X[i] = features.ravel() M[i] = mask_repeated.ravel() proportion_masked.append(mask.sum() / mask.shape[0]) print('Extraction concluded. Self-occlusions on average masked {:.0%} of ' 'vertices.'.format(np.array(proportion_masked).mean())) print('Performing R-PCA to complete missing textures') A, E = rpca_missing(X, M, verbose=True, lambda_=lambda_) print('R-PCA completed. Building PCA model of features on completed ' 'samples.') model = PCAVectorModel(A, inplace=True) print('Trimming the components to retain only what was required.') model.trim_components(n_components=n_components) return model, X, M
def test_pca_orthogonalize_against(): pca_samples = np.random.randn(10, 10) pca_model = PCAVectorModel(pca_samples) lm_samples = np.asarray([np.random.randn(10) for _ in range(4)]) lm_model = LinearVectorModel(np.asarray(lm_samples)) # orthogonalize pca_model.orthonormalize_against_inplace(lm_model) # number of active components must remain the same assert_equal(pca_model.n_active_components, 6)
def test_pca_orthogonalize_against(): pca_samples = np.random.randn(10, 10) pca_model = PCAVectorModel(pca_samples) lm_samples = np.asarray([np.random.randn(10) for _ in range(4)]) lm_model = LinearModel(np.asarray(lm_samples)) # orthogonalize pca_model.orthonormalize_against_inplace(lm_model) # number of active components must remain the same assert_equal(pca_model.n_active_components, 6)
def test_pca_increment_noncentred(): pca_samples = [np.random.randn(10) for _ in range(10)] ipca_model = PCAVectorModel(pca_samples[:3], centre=False) ipca_model.increment(pca_samples[3:6]) ipca_model.increment(pca_samples[6:]) bpca_model = PCAVectorModel(pca_samples, centre=False) assert_almost_equal(np.abs(ipca_model.components), np.abs(bpca_model.components)) assert_almost_equal(ipca_model.eigenvalues, bpca_model.eigenvalues) assert_almost_equal(ipca_model.mean(), bpca_model.mean())
def test_pca_n_active_components_too_many(): samples = [np.random.randn(10) for _ in range(10)] model = PCAVectorModel(samples) # too many components model.n_active_components = 100 assert_equal(model.n_active_components, 9) # reset too smaller number of components model.n_active_components = 5 assert_equal(model.n_active_components, 5) # reset to too many components model.n_active_components = 100 assert_equal(model.n_active_components, 9)
def test_pca_variance_after_trim(): samples = [np.random.randn(10) for _ in range(10)] model = PCAVectorModel(samples) # set number of active components model.trim_components(5) # kept variance must be smaller than total variance assert(model.variance() < model.original_variance()) # kept variance ratio must be smaller than 1.0 assert(model.variance_ratio() < 1.0) # noise variance must be bigger than 0.0 assert(model.noise_variance() > 0.0) # noise variance ratio must also be bigger than 0.0 assert(model.noise_variance_ratio() > 0.0) # inverse noise variance is computable assert(model.inverse_noise_variance() == 1 / model.noise_variance())
def test_pca_variance_after_change_n_active_components(): samples = [np.random.randn(10) for _ in range(10)] model = PCAVectorModel(samples) # set number of active components model.n_active_components = 5 # kept variance must be smaller than total variance assert model.variance() < model.original_variance() # kept variance ratio must be smaller than 1.0 assert model.variance_ratio() < 1.0 # noise variance must be bigger than 0.0 assert model.noise_variance() > 0.0 # noise variance ratio must also be bigger than 0.0 assert model.noise_variance_ratio() > 0.0 # inverse noise variance is computable assert model.inverse_noise_variance() == 1 / model.noise_variance()
def increment(self, samples, n_samples=None, forgetting_factor=1.0, verbose=False): r""" Update the eigenvectors, eigenvalues and mean vector of this model by performing incremental PCA on the given samples. Parameters ---------- samples : `list` of :map:`Vectorizable` List of new samples to update the model from. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a list (so we know how large the data matrix needs to be). forgetting_factor : ``[0.0, 1.0]`` `float`, optional Forgetting factor that weights the relative contribution of new samples vs old samples. If 1.0, all samples are weighted equally and, hence, the results is the exact same as performing batch PCA on the concatenated list of old and new simples. If <1.0, more emphasis is put on the new samples. See [1] for details. References ---------- .. [1] David Ross, Jongwoo Lim, Ruei-Sung Lin, Ming-Hsuan Yang. "Incremental Learning for Robust Visual Tracking". IJCV, 2007. """ # build a data matrix from the new samples data = as_matrix(samples, length=n_samples, verbose=verbose) n_new_samples = data.shape[0] PCAVectorModel.increment(self, data, n_samples=n_new_samples, forgetting_factor=forgetting_factor, verbose=verbose)
def init_from_covariance_matrix(cls, C, mean, n_samples, centred=True, max_n_components=None): r""" Build the Principal Component Analysis (PCA) by eigenvalue decomposition of the provided covariance/scatter matrix. For details of the implementation of PCA, see :map:`pcacov`. Parameters ---------- C : ``(n_features, n_features)`` `ndarray` The Covariance/Scatter matrix, where `N` is the number of features. mean : :map:`Vectorizable` The mean instance. It must be a :map:`Vectorizable` and *not* an `ndarray`. n_samples : `int` The number of samples used to generate the covariance matrix. centred : `bool`, optional When ``True`` we assume that the data were centered before computing the covariance matrix. max_n_components : `int`, optional The maximum number of components to keep in the model. Any components above and beyond this one are discarded. """ # Create new pca instance self_model = PCAVectorModel.__new__(cls) self_model.n_samples = n_samples # Compute pca on covariance e_vectors, e_values = pcacov(C) # The call to __init__ of MeanLinearModel is done in here self_model._constructor_helper(eigenvalues=e_values, eigenvectors=e_vectors, mean=mean.as_vector(), centred=centred, max_n_components=max_n_components) VectorizableBackedModel.__init__(self_model, mean) return self_model
def init_from_components(cls, components, eigenvalues, mean, n_samples, centred, max_n_components=None): r""" Build the Principal Component Analysis (PCA) using the provided components (eigenvectors) and eigenvalues. Parameters ---------- components : ``(n_components, n_features)`` `ndarray` The eigenvectors to be used. eigenvalues : ``(n_components, )`` `ndarray` The corresponding eigenvalues. mean : :map:`Vectorizable` The mean instance. It must be a :map:`Vectorizable` and *not* an `ndarray`. n_samples : `int` The number of samples used to generate the eigenvectors. centred : `bool`, optional When ``True`` we assume that the data were centered before computing the eigenvectors. max_n_components : `int`, optional The maximum number of components to keep in the model. Any components above and beyond this one are discarded. """ # Create new pca instance self_model = PCAVectorModel.__new__(cls) self_model.n_samples = n_samples # The call to __init__ of MeanLinearModel is done in here self_model._constructor_helper(eigenvalues=eigenvalues, eigenvectors=components, mean=mean.as_vector(), centred=centred, max_n_components=max_n_components) VectorizableBackedModel.__init__(self_model, mean) return self_model
def project_whitened_vector(self, vector_instance): return PCAVectorModel.project_whitened(self, vector_instance)
def test_pca_trim_variance_limit(): samples = [np.random.randn(10) for _ in range(10)] model = PCAVectorModel(samples) with raises(ValueError): # impossible to keep more than 1.0 ratio variance model.trim_components(2.5)
def test_pca_n_active_components_negative(): samples = [np.random.randn(10) for _ in range(10)] model = PCAVectorModel(samples) with raises(ValueError): model.n_active_components = -5
def project_vector(self, instance_vector): return PCAVectorModel.project(self, instance_vector)
def test_pca_n_active_components(): samples = [np.random.randn(10) for _ in range(10)] model = PCAVectorModel(samples) # integer model.n_active_components = 5 assert_equal(model.n_active_components, 5)
def test_pca_n_active_components_negative(): samples = [np.random.randn(10) for _ in range(10)] model = PCAVectorModel(samples) # not sufficient components model.n_active_components = -5
def reconstruct_vector(self, instance_vector): return PCAVectorModel.reconstruct(self, instance_vector)
def test_pca_vector_init_from_covariance(): n_samples = 30 n_features = 10 centre_values = [True, False] for centre in centre_values: # generate samples matrix and mean vector samples = np.random.randn(n_samples, n_features) mean = np.mean(samples, axis=0) # compute covariance matrix if centre: X = samples - mean C = np.dot(X.T, X) / (n_samples - 1) else: C = np.dot(samples.T, samples) / (n_samples - 1) # create the 2 pca models pca1 = PCAVectorModel.init_from_covariance_matrix(C, mean, centred=centre, n_samples=n_samples) pca2 = PCAVectorModel(samples, centre=centre, inplace=False) # compare them assert_array_almost_equal(pca1.mean(), pca2.mean()) assert_array_almost_equal(pca1.component(0, with_mean=False), pca2.component(0, with_mean=False)) assert_array_almost_equal(pca1.component(7), pca2.component(7)) assert_array_almost_equal(pca1.components, pca2.components) assert_array_almost_equal(pca1.eigenvalues, pca2.eigenvalues) assert_array_almost_equal(pca1.eigenvalues_cumulative_ratio(), pca2.eigenvalues_cumulative_ratio()) assert_array_almost_equal(pca1.eigenvalues_ratio(), pca2.eigenvalues_ratio()) weights = np.random.randn(pca1.n_active_components - 4) assert_array_almost_equal(pca1.instance(weights), pca2.instance(weights)) assert(pca1.n_active_components == pca2.n_active_components) assert(pca1.n_components == pca2.n_components) assert(pca1.n_features == pca2.n_features) assert(pca1.n_samples == pca2.n_samples) assert(pca1.noise_variance() == pca2.noise_variance()) assert(pca1.noise_variance_ratio() == pca2.noise_variance_ratio()) assert_allclose(pca1.variance(), pca2.variance()) assert(pca1.variance_ratio() == pca2.variance_ratio()) assert_array_almost_equal(pca1.whitened_components(), pca2.whitened_components())
def component_vector(self, index, with_mean=True, scale=1.0): return PCAVectorModel.component(self, index, with_mean=with_mean, scale=scale)
def test_pca_vector_init_from_covariance(): n_samples = 30 n_features = 10 centre_values = [True, False] for centre in centre_values: # generate samples matrix and mean vector samples = np.random.randn(n_samples, n_features) mean = np.mean(samples, axis=0) # compute covariance matrix if centre: X = samples - mean C = np.dot(X.T, X) / (n_samples - 1) else: C = np.dot(samples.T, samples) / (n_samples - 1) # create the 2 pca models pca1 = PCAVectorModel.init_from_covariance_matrix(C, mean, centred=centre, n_samples=n_samples) pca2 = PCAVectorModel(samples, centre=centre, inplace=False) # compare them assert_array_almost_equal(pca1.mean(), pca2.mean()) assert_array_almost_equal(pca1.component(0, with_mean=False), pca2.component(0, with_mean=False)) assert_array_almost_equal(pca1.component(7), pca2.component(7)) assert_array_almost_equal(pca1.components, pca2.components) assert_array_almost_equal(pca1.eigenvalues, pca2.eigenvalues) assert_array_almost_equal(pca1.eigenvalues_cumulative_ratio(), pca2.eigenvalues_cumulative_ratio()) assert_array_almost_equal(pca1.eigenvalues_ratio(), pca2.eigenvalues_ratio()) weights = np.random.randn(pca1.n_active_components - 4) assert_array_almost_equal(pca1.instance(weights), pca2.instance(weights)) assert pca1.n_active_components == pca2.n_active_components assert pca1.n_components == pca2.n_components assert pca1.n_features == pca2.n_features assert pca1.n_samples == pca2.n_samples assert pca1.noise_variance() == pca2.noise_variance() assert pca1.noise_variance_ratio() == pca2.noise_variance_ratio() assert_allclose(pca1.variance(), pca2.variance()) assert pca1.variance_ratio() == pca2.variance_ratio() assert_array_almost_equal(pca1.whitened_components(), pca2.whitened_components())
def test_pca_trim_variance_limit(): samples = [np.random.randn(10) for _ in range(10)] model = PCAVectorModel(samples) # impossible to keep more than 1.0 ratio variance model.trim_components(2.5)
def instance_vector(self, weights, normalized_weights=False): return PCAVectorModel.instance(self, weights, normalized_weights=normalized_weights)