Beispiel #1
0
 def test_build(self):
     m = Manifold(self.data, 'euclidean').build(criterion.MaxDepth(1))
     self.assertEqual(2, len(m.layers))
     m.build(criterion.MaxDepth(2))
     self.assertEqual(3, len(m.layers))
     self.assertEqual(len(self.data), m.graph.population)
     return
Beispiel #2
0
 def test_partition_backends(self):
     data = datasets.random(n=100, dimensions=5)[0]
     m_single = Manifold(data, 'euclidean')._partition_single(
         [criterion.MaxDepth(5)])
     m_thread = Manifold(data, 'euclidean')._partition_threaded(
         [criterion.MaxDepth(5)])
     self.assertEqual(m_single, m_thread)
     return
Beispiel #3
0
    def test_eq(self):
        self.assertEqual(self.manifold, self.manifold)
        other = Manifold(self.data, 'euclidean', argpoints=0.2).build(
            criterion.MaxDepth(10),
            criterion.LFDRange(60, 50),
        )
        self.assertNotEqual(self.manifold, other)
        self.assertEqual(other, other)

        other = Manifold(self.data, 'cosine')
        self.assertNotEqual(self.manifold, other)
        return
Beispiel #4
0
    def test_neighbors(self):
        for dataset in [
                datasets.bullseye,
        ]:  # datasets.spiral_2d, datasets.tori, datasets.skewer, datasets.line]:
            data, labels = dataset()
            manifold = Manifold(data, 'euclidean')
            manifold.build(
                criterion.MaxDepth(12),
                criterion.LFDRange(60, 50),
            )

            for cluster in manifold.graph.clusters:
                potential_neighbors = [
                    c for c in manifold.graph.clusters
                    if c.name != cluster.name
                ]
                argcenters = [c.argmedoid for c in potential_neighbors]
                distances = list(cluster.distance_from(argcenters))
                radii = [
                    cluster.radius + c.radius for c in potential_neighbors
                ]
                true_neighbors = {
                    c: d
                    for c, d, r in zip(potential_neighbors, distances, radii)
                    if d <= r
                }
                neighbors = {
                    edge.neighbor: edge.distance
                    for edge in manifold.graph.edges[cluster]
                }

                extras = set(neighbors.keys()) - set(true_neighbors.keys())
                self.assertEqual(
                    0,
                    len(extras),
                    msg=
                    f'got extra neighbors: optimal, true {len(true_neighbors)}, actual {len(neighbors)}\n'
                    + "\n".join([
                        f"{c.name}, {cluster.radius + c.radius:.6f}"
                        for c in extras
                    ]))

                missed = set(true_neighbors.keys()) - set(neighbors.keys())
                self.assertEqual(
                    0,
                    len(missed),
                    msg=
                    f'missed some neighbors: optimal, true {len(true_neighbors)}, actual {len(neighbors)}\n'
                    + '\n'.join([
                        f'{c.name}, {cluster.radius + c.radius:.6f}'
                        for c in missed
                    ]))
        return
Beispiel #5
0
 def test_argsamples(self):
     data = np.zeros((100, 100))
     for i in range(10):
         data = np.concatenate([data, np.ones((1, 100)) * i], axis=0)
         manifold = Manifold(data, 'euclidean')
         cluster = Cluster(manifold, manifold.argpoints, '')
         self.assertLessEqual(i + 1, len(cluster.argsamples))
     return
Beispiel #6
0
 def setUpClass(cls) -> None:
     cls.data, cls.labels = datasets.random(n=1000, dimensions=3)
     cls.manifold = Manifold(cls.data, 'euclidean')
     cls.manifold.build(
         criterion.MaxDepth(8),
         criterion.LFDRange(60, 50),
     )
     return
Beispiel #7
0
 def setUpClass(cls) -> None:
     np.random.seed(42)
     cls.data, _ = datasets.bullseye(n=1000, num_rings=2)
     cls.manifold = Manifold(cls.data, 'euclidean')
     cls.manifold.build(
         criterion.MaxDepth(10),
         criterion.LFDRange(60, 50),
     )
     return
Beispiel #8
0
    def test_init(self):
        m = Manifold(self.data, 'euclidean')
        self.assertEqual(1, len(m.layers))

        m = Manifold(self.data, 'euclidean', [1, 2, 3])
        self.assertListEqual([1, 2, 3], m.argpoints)

        fraction = 0.2
        m = Manifold(self.data, 'euclidean', fraction)
        self.assertEqual(int(len(self.data) * fraction), len(m.argpoints))

        with self.assertRaises(ValueError):
            # noinspection PyTypeChecker
            Manifold(self.data, 'euclidean', ['a', 'b', 'c'])

        with self.assertRaises(ValueError):
            # noinspection PyTypeChecker
            Manifold(self.data, 'euclidean', 'apples')
        return
Beispiel #9
0
    def test_find_knn(self):
        data = datasets.bullseye()[0]
        point = data[0]
        points = sorted([
            (d, p)
            for p, d in zip(range(data.shape[0]),
                            cdist(np.asarray([point]), data, 'euclidean')[0])
        ])

        m = Manifold(data, 'euclidean')
        m.build_tree(criterion.MinPoints(10), criterion.MaxDepth(10))

        ks = list(range(10))
        ks.extend(range(10, data.shape[0], 1000))
        for k in ks:
            naive_results = {p for d, p in points[:k]}
            results = m.find_knn(point, k)
            self.assertEqual(k, len(results))
            self.assertSetEqual(naive_results, {p for p, _ in results})
Beispiel #10
0
 def test_tree_search(self):
     np.random.seed(42)
     data, labels = datasets.line()
     manifold = Manifold(data, 'euclidean')
     manifold.build_tree(criterion.MinPoints(10), criterion.MaxDepth(5))
     # Finding points that are in data.
     for depth, layer in enumerate(manifold.layers):
         for cluster in layer.clusters:
             linear = set([
                 c for c in layer
                 if c.overlaps(cluster.medoid, cluster.radius)
             ])
             tree = set(
                 next(iter(manifold.layers[0])).tree_search(
                     cluster.medoid, cluster.radius, cluster.depth).keys())
             self.assertSetEqual(set(), tree - linear)
             for d in range(depth, 0, -1):
                 parents = set([
                     manifold.select(cluster.name[:-1])
                     for cluster in linear
                 ])
                 for parent in parents:
                     results = parent.tree_search(cluster.medoid,
                                                  cluster.radius,
                                                  parent.depth)
                     self.assertIn(
                         parent,
                         results,
                         msg=
                         f'\n{parent.name} not in {[c.name for c in results]}. '
                         f'got {len(results)} hits.')
     # Attempting to find points that *may* be in the data
     results = manifold.root.tree_search(point=np.asarray([0, 1]),
                                         radius=0.,
                                         depth=-1)
     self.assertEqual(0, len(results))
     with self.assertRaises(ValueError):
         _ = manifold.root.tree_search(point=np.asarray([0, 1]),
                                       radius=0.,
                                       depth=-5)
     return
Beispiel #11
0
    def test_build_tree(self):
        m = Manifold(self.data, 'euclidean')
        self.assertEqual(1, len(m.layers))

        m.build_tree(criterion.AddLevels(2))
        self.assertEqual(3, len(m.layers))

        # MaxDepth shouldn't do anything in build_tree if we're beyond that depth already.
        m.build_tree(criterion.MaxDepth(1))
        self.assertEqual(3, len(m.layers))

        m.build_tree()
        self.assertEqual(len(self.data), m.layers[-1].cardinality)
        return
Beispiel #12
0
    def build_manifolds(self, data: np.array) -> List[Manifold]:
        """ Builds the list of manifolds for the class.

        :param data: numpy array of data where the rows are instances and the columns are features.
        :return: The list of manifolds.
        """
        criteria: List[Criterion] = [
            MaxDepth(self.max_depth),
            MinPoints(self.min_points)
        ]
        self._manifolds = [
            Manifold(data, metric).build(*criteria) for metric in self.metrics
        ]
        return self._manifolds
Beispiel #13
0
    def test_load(self):
        original = self.manifold
        with TemporaryFile() as fp:
            original.dump(fp)
            fp.seek(0)
            loaded = Manifold.load(fp, self.data)
        self.assertEqual(original, loaded)
        self.assertEqual(set(original.layers[-1]), set(loaded.layers[-1]))
        self.assertEqual(original.graph, loaded.graph)

        for layer in loaded.layers:
            for cluster in layer:
                self.assertIn('radius', cluster.cache)
                self.assertIn('argradius', cluster.cache)
                self.assertIn('argsamples', cluster.cache)
                self.assertIn('argmedoid', cluster.cache)
                self.assertIn('local_fractal_dimension', cluster.cache)
        return
Beispiel #14
0
 def test_partition(self):
     manifold = Manifold(datasets.xor()[0], 'euclidean')
     cluster = manifold.select('')
     children = list(cluster.partition())
     self.assertGreater(len(children), 1)
     return
Beispiel #15
0
 def setUpClass(cls) -> None:
     cls.data = np.random.randn(1_000, 100)
     cls.manifold = Manifold(cls.data, 'euclidean')
     return
Beispiel #16
0
 def __init__(self, data: Data, metric: Metric):
     self.data: Data = data
     self.metric: Metric = metric
     self.manifold: Manifold = Manifold(self.data, self.metric)
     self.distance = self.manifold.distance
     self.root: Cluster = self.manifold.root