def test_pca_init_from_covariance(): n_samples = 30 n_features = 10 n_dims = 2 centre_values = [True, False] for centre in centre_values: # generate samples list and convert it to nd.array samples = [ PointCloud(np.random.randn(n_features, n_dims)) for _ in range(n_samples) ] data, template = as_matrix(samples, return_template=True) # compute covariance matrix and mean if centre: mean_vector = np.mean(data, axis=0) mean = template.from_vector(mean_vector) X = data - mean_vector C = np.dot(X.T, X) / (n_samples - 1) else: mean = samples[0] C = np.dot(data.T, data) / (n_samples - 1) # create the 2 pca models pca1 = PCAModel.init_from_covariance_matrix(C, mean, centred=centre, n_samples=n_samples) pca2 = PCAModel(samples, centre=centre) # compare them assert_array_almost_equal(pca1.component_vector(0, with_mean=False), pca2.component_vector(0, with_mean=False)) assert_array_almost_equal( pca1.component(7).as_vector(), pca2.component(7).as_vector()) assert_array_almost_equal(pca1.components, pca2.components) assert_array_almost_equal(pca1.eigenvalues, pca2.eigenvalues) assert_array_almost_equal(pca1.eigenvalues_cumulative_ratio(), pca2.eigenvalues_cumulative_ratio()) assert_array_almost_equal(pca1.eigenvalues_ratio(), pca2.eigenvalues_ratio()) weights = np.random.randn(pca1.n_active_components) assert_array_almost_equal( pca1.instance(weights).as_vector(), pca2.instance(weights).as_vector()) weights2 = np.random.randn(pca1.n_active_components - 4) assert_array_almost_equal(pca1.instance_vector(weights2), pca2.instance_vector(weights2)) assert_array_almost_equal(pca1.mean().as_vector(), pca2.mean().as_vector()) assert_array_almost_equal(pca1.mean_vector, pca2.mean_vector) assert (pca1.n_active_components == pca2.n_active_components) assert (pca1.n_components == pca2.n_components) assert (pca1.n_features == pca2.n_features) assert (pca1.n_samples == pca2.n_samples) assert (pca1.noise_variance() == pca2.noise_variance()) assert (pca1.noise_variance_ratio() == pca2.noise_variance_ratio()) assert_almost_equal(pca1.variance(), pca2.variance()) assert_almost_equal(pca1.variance_ratio(), pca2.variance_ratio()) assert_array_almost_equal(pca1.whitened_components(), pca2.whitened_components())
def test_pca_init_from_covariance(): n_samples = 30 n_features = 10 n_dims = 2 centre_values = [True, False] for centre in centre_values: # generate samples list and convert it to nd.array samples = [PointCloud(np.random.randn(n_features, n_dims)) for _ in range(n_samples)] data, template = as_matrix(samples, return_template=True) # compute covariance matrix and mean if centre: mean_vector = np.mean(data, axis=0) mean = template.from_vector(mean_vector) X = data - mean_vector C = np.dot(X.T, X) / (n_samples - 1) else: mean = samples[0] C = np.dot(data.T, data) / (n_samples - 1) # create the 2 pca models pca1 = PCAModel.init_from_covariance_matrix(C, mean, centred=centre, n_samples=n_samples) pca2 = PCAModel(samples, centre=centre) # compare them assert_array_almost_equal(pca1.component_vector(0, with_mean=False), pca2.component_vector(0, with_mean=False)) assert_array_almost_equal(pca1.component(7).as_vector(), pca2.component(7).as_vector()) assert_array_almost_equal(pca1.components, pca2.components) assert_array_almost_equal(pca1.eigenvalues, pca2.eigenvalues) assert_array_almost_equal(pca1.eigenvalues_cumulative_ratio(), pca2.eigenvalues_cumulative_ratio()) assert_array_almost_equal(pca1.eigenvalues_ratio(), pca2.eigenvalues_ratio()) weights = np.random.randn(pca1.n_active_components) assert_array_almost_equal(pca1.instance(weights).as_vector(), pca2.instance(weights).as_vector()) weights2 = np.random.randn(pca1.n_active_components - 4) assert_array_almost_equal(pca1.instance_vector(weights2), pca2.instance_vector(weights2)) assert_array_almost_equal(pca1.mean().as_vector(), pca2.mean().as_vector()) assert_array_almost_equal(pca1.mean_vector, pca2.mean_vector) assert(pca1.n_active_components == pca2.n_active_components) assert(pca1.n_components == pca2.n_components) assert(pca1.n_features == pca2.n_features) assert(pca1.n_samples == pca2.n_samples) assert(pca1.noise_variance() == pca2.noise_variance()) assert(pca1.noise_variance_ratio() == pca2.noise_variance_ratio()) assert_almost_equal(pca1.variance(), pca2.variance()) assert_almost_equal(pca1.variance_ratio(), pca2.variance_ratio()) assert_array_almost_equal(pca1.whitened_components(), pca2.whitened_components())
def test_pca_variance(): samples = [PointCloud(np.random.randn(10)) for _ in range(10)] model = PCAModel(samples) # kept variance must be equal to total variance assert_equal(model.variance(), model.original_variance()) # kept variance ratio must be 1.0 assert_equal(model.variance_ratio(), 1.0) # noise variance must be 0.0 assert_equal(model.noise_variance(), 0.0) # noise variance ratio must be also 0.0 assert_equal(model.noise_variance_ratio(), 0.0)
def test_pca_variance_after_trim(): samples = [PointCloud(np.random.randn(10)) for _ in range(10)] model = PCAModel(samples) # set number of active components model.trim_components(5) # kept variance must be smaller than total variance assert(model.variance() < model.original_variance()) # kept variance ratio must be smaller than 1.0 assert(model.variance_ratio() < 1.0) # noise variance must be bigger than 0.0 assert(model.noise_variance() > 0.0) # noise variance ratio must also be bigger than 0.0 assert(model.noise_variance_ratio() > 0.0) # inverse noise variance is computable assert(model.inverse_noise_variance() == 1 / model.noise_variance())