def test_converts_input_domain_if_needed(self): self.create_normal_dataset() projector = linear.Pca(variance_covered=.99)(self.dataset) new_examples = data.Table(data.Domain(self.dataset.domain.features[:5]), [[1.,2.,3.,4.,5.]]) projector(new_examples)
def test_pca_with_max_components(self): max_components = 3 self.create_dataset(ncomponents = max_components + 3) pca = linear.Pca(max_components=max_components)(self.dataset) nvectors, vector_dimension = pca.projection.shape self.assertEqual(nvectors, max_components)
def test_pca_with_variance_covered(self): ncomponents = 3 self.create_dataset(ncomponents=ncomponents) pca = linear.Pca(variance_covered=.99)(self.dataset) nvectors, vector_dimension = pca.projection.shape self.assertEqual(nvectors, ncomponents)
def test_projected_domain_can_convert_data_without_class(self): self.create_normal_dataset() projector = linear.Pca(variance_covered=.99)(self.dataset) projected_data = projector(self.dataset) converted_data = data.Table(projected_data.domain, self.dataset) self.assertItemsEqual(projected_data, converted_data)
def test_total_variance_remains_the_same(self): for m in (10, 250): self.create_dataset(m=m) pca = linear.Pca()(self.dataset) self.assertAlmostEqual(pca.variance_sum, pca.variances.sum()) self.assertAlmostEqual(pca.variance_sum, (self.principal_components != 0).sum())
def test_pca_with_standardization(self): self.create_dataset(ncomponents=1) pca = linear.Pca(standardize=True)(self.dataset) projection = pca.projection[0] non_zero_elements = projection[projection.nonzero()] # since values in all dimensions are normally distributed, dimensions should be treated as equally important self.assertAlmostEqual(non_zero_elements.min(), non_zero_elements.max())
def test_pca(self): for m in (10, 250): self.create_dataset(m=m) pca = linear.Pca(standardize=False)(self.dataset) self.assertInCorrectSpace(pca.projection[pca.variances > 0.01, :]) for v in pca.projection: # projections vectors should be normalized self.assertAlmostEqual(np.linalg.norm(v), 1.) # Components should have decreasing variants self.assertListEqual(pca.variances.tolist(), sorted(pca.variances, reverse=True))
def test_projected_domain_contains_class_vars(self): self.create_dataset_with_classes() projector = linear.Pca(variance_covered=.99)(self.dataset) projected_data = projector(self.dataset) self.assertIn(self.dataset.domain.class_var, projected_data.domain) for class_ in self.dataset.domain.class_vars: self.assertIn(class_, projected_data.domain) for ex1, ex2 in zip(self.dataset, projected_data): self.assertEqual(ex1.get_class(), ex2.get_class()) for v1, v2 in zip(ex1.get_classes(), ex2.get_classes()): self.assertEqual(v2, v2)
def test_projects_data_table(self): self.create_normal_dataset() projector = linear.Pca(variance_covered=.99)(self.dataset) projector(self.dataset)
def test_pca_on_only_constant_features(self): self.create_constant_dataset() with self.assertRaises(ValueError): linear.Pca()(self.dataset)
def test_pca_on_empty_data(self): self.create_empty_dataset() with self.assertRaises(ValueError): linear.Pca()(self.dataset)
def test_pca_handles_unknowns(self): self.create_dataset_with_unknowns() linear.Pca()(self.dataset)