Ejemplo n.º 1
0
 def test_call_and_closest_pair(self, PointSet):
     ps = PointSet
     fp = FastPair().build(ps)
     cp = fp.closest_pair()
     bf = fp.closest_pair_brute_force()
     assert fp() == cp
     assert abs(cp[0] - bf[0]) < 1e-8
     assert cp[1] == bf[1]
Ejemplo n.º 2
0
 def test_call_and_closest_pair(self):
     ps = PointSet()
     fp = FastPair().build(ps)
     cp = fp.closest_pair()
     bf = fp.closest_pair_brute_force()
     assert fp() == cp
     assert abs(cp[0] - bf[0]) < 1e-8
     assert cp[1] == bf[1]
Ejemplo n.º 3
0
 def test_update_point_less_points(self, PointSet):
     ps = PointSet
     fp = FastPair()
     for p in ps[:9]:
         fp += p
     assert fp.initialized is False
     old = ps[0]  # Just grab the first point...
     new = rand_tuple(len(ps[0]))
     res = fp._update_point(old, new)
     assert len(fp) == 1
Ejemplo n.º 4
0
 def test_all_closest_pairs(self):
     ps = PointSet()
     fp = FastPair().build(ps)
     cp = fp.closest_pair()
     bf = fp.closest_pair_brute_force()  # Ordering should be the same
     # dc = fp.closest_pair_divide_conquer()  # Maybe different ordering
     assert abs(cp[0] - bf[0]) < 1e-8
     assert cp[1] == bf[1]  # Tuple comparison
     test = min([(fp.dist(a, b), (a, b)) for a, b in combinations(ps, r=2)], key=itemgetter(0))
     assert abs(cp[0] - test[0]) < 1e-8
     assert sorted(cp[1]) == sorted(test[1])  # Tuple comparison
Ejemplo n.º 5
0
 def test_sub(self, PointSet):
     ps = PointSet
     fp = FastPair().build(ps)
     start = fp._find_neighbor(ps[-1])
     fp -= ps[-1]
     end = fp._find_neighbor(start["neigh"])
     assert end["neigh"] != ps[-1]
     # This is risky, because it might legitimately be the same...?
     assert start["dist"] != end["dist"]
     assert len(fp) == len(ps) - 1
     with pytest.raises(ValueError):
         fp -= rand_tuple(len(ps[0]))
Ejemplo n.º 6
0
 def test_call_and_closest_pair_min_points(self, image_array):
     ps = image_array
     fp = FastPair(dist=image_distance)
     for p in ps:
         fp += p
     assert fp.initialized is False
     assert len(fp) == 6
     cp = fp.closest_pair()
     bf = fp.closest_pair_brute_force()
     assert fp() == cp
     assert abs(cp[0] - bf[0]) < 1e-8
     assert cp[1] == bf[1]
Ejemplo n.º 7
0
 def test_sub(self):
     ps = PointSet()
     fp = FastPair().build(ps)
     start = fp._find_neighbor(ps[-1])
     fp -= ps[-1]
     end = fp._find_neighbor(start["neigh"])
     assert end["neigh"] != ps[-1]
     # This is risky, because it might legitimately be the same...?
     assert start["dist"] != end["dist"]
     assert len(fp) == len(ps)-1
     with pytest.raises(ValueError):
         fp -= rand_tuple(len(ps[0]))
Ejemplo n.º 8
0
 def test_all_closest_pairs(self, PointSet):
     ps = PointSet
     fp = FastPair().build(ps)
     cp = fp.closest_pair()
     bf = fp.closest_pair_brute_force()  # Ordering should be the same
     # dc = fp.closest_pair_divide_conquer()  # Maybe different ordering
     assert abs(cp[0] - bf[0]) < 1e-8
     assert cp[1] == bf[1]  # Tuple comparison
     test = min(
         [(fp.dist(a, b), (a, b)) for a, b in combinations(ps, r=2)],
         key=itemgetter(0),
     )
     assert abs(cp[0] - test[0]) < 1e-8
     assert sorted(cp[1]) == sorted(test[1])  # Tuple comparison
Ejemplo n.º 9
0
 def test_update_point(self, PointSet):
     # Still failing sometimes...
     ps = PointSet
     fp = FastPair().build(ps)
     assert len(fp) == len(ps)
     old = ps[0]  # Just grab the first point...
     new = rand_tuple(len(ps[0]))
     res = fp._update_point(old, new)
     assert old not in fp
     assert new in fp
     assert len(fp) == len(ps)  # Size shouldn't change
     l = [(fp.dist(a, b), b) for a, b in zip(cycle([new]), ps)]
     res = min(l, key=itemgetter(0))
     neigh = fp.neighbors[new]
Ejemplo n.º 10
0
 def test_find_neighbor_and_sdist(self):
     ps = PointSet()
     fp = FastPair().build(ps)
     rando = rand_tuple(len(ps[0]))
     neigh = fp._find_neighbor(rando)  # Abusing find_neighbor!
     dist = fp.dist(rando, neigh["neigh"])
     assert  abs(dist - neigh["dist"]) < 1e-8
     assert len(fp) == len(ps)  # Make sure we didn't add a point...
     l = [(fp.dist(a, b), b) for a, b in zip(cycle([rando]), ps)]
     res = min(l, key=itemgetter(0))
     assert abs(res[0] - neigh["dist"]) < 1e-8
     assert res[1] == neigh["neigh"]
     res = min(fp.sdist(rando), key=itemgetter(0))
     assert abs(neigh["dist"] - res[0]) < 1e-8
     assert neigh["neigh"] == res[1]
Ejemplo n.º 11
0
 def test_init(self):
     fp = FastPair()
     assert fp.min_points == 10
     assert isinstance(fp.dist, FunctionType)
     assert fp.initialized is False
     assert len(fp.points) == 0
     assert len(fp.neighbors) == 0
Ejemplo n.º 12
0
 def test_update_point(self):
     # Still failing sometimes...
     ps = PointSet()
     fp = FastPair().build(ps)
     assert len(fp) == len(ps)
     old = ps[0]  # Just grab the first point...
     new = rand_tuple(len(ps[0]))
     res = fp._update_point(old, new)
     assert old not in fp
     assert new in fp
     assert len(fp) == len(ps)  # Size shouldn't change
     l = [(fp.dist(a, b), b) for a, b in zip(cycle([new]), ps)]
     res = min(l, key=itemgetter(0))
     neigh = fp.neighbors[new]
     assert abs(res[0] - neigh["dist"]) < 1e-8
     assert res[1] == neigh["neigh"]
Ejemplo n.º 13
0
    def __init__(self,
                 kmax=100,
                 dist=kernel_dist(gaussian),
                 centroid_factory=KernelCentroid):
        """Initialize an empty FastPair data-structure.

        Parameters
        ----------
        kmax : int, default=100
            The maximum number of cluster centroids to store (i.e., size of
            memory). This parameter controls the 'scale' of the desired
            solution, such that larger values of `kmax` will lead to a higher
            resolution cluster solution.
        """
        self.kmax = kmax
        self.npoints = 0
        self.centroid_factory = centroid_factory
        self.fastpair = FastPair(10, dist=dist)
Ejemplo n.º 14
0
 def test_add(self, PointSet):
     ps = PointSet
     fp = FastPair()
     for p in ps[:9]:
         fp += p
     assert fp.initialized is False
     assert len(fp) == 9
     for p in ps[9:]:
         fp += p
     assert fp.initialized is True
Ejemplo n.º 15
0
 def test_cluster(self):
     ps = PointSet()
     fp = FastPair().build(ps)
     for i in range(len(fp)-1):
         # Version one
         dist, (a, b) = fp.closest_pair()
         c = interact(a, b)
         fp -= b  # Drop b
         fp -= a
         fp += c
         # Order gets reversed here...
         d, (e, f) = min([(fp.dist(i, j), (i, j)) for i, j in
                          combinations(ps, r=2)], key=itemgetter(0))
         g = interact(e, f)
         assert abs(d - dist) < 1e-8
         assert (a == e or b == e) and (b == f or a == f)
         assert c == g
         ps.remove(e)
         ps.remove(f)
         ps.append(g)
         assert contains_same(fp.points, ps)
     assert len(fp.points) == len(ps) == 1
Ejemplo n.º 16
0
 def test_cluster(self, PointSet):
     ps = PointSet
     fp = FastPair().build(ps)
     for i in range(len(fp) - 1):
         # Version one
         dist, (a, b) = fp.closest_pair()
         c = interact(a, b)
         fp -= b  # Drop b
         fp -= a
         fp += c
         # Order gets reversed here...
         d, (e, f) = min(
             [(fp.dist(i, j), (i, j)) for i, j in combinations(ps, r=2)],
             key=itemgetter(0),
         )
         g = interact(e, f)
         assert abs(d - dist) < 1e-8
         assert (a == e or b == e) and (b == f or a == f)
         assert c == g
         ps.remove(e)
         ps.remove(f)
         ps.append(g)
         assert contains_same(fp.points, ps)
     assert len(fp.points) == len(ps) == 1
Ejemplo n.º 17
0
    def __init__(self, kmax=100, dist=kernel_dist(gaussian),
                 centroid_factory=KernelCentroid):
        """Initialize an empty FastPair data-structure.

        Parameters
        ----------
        kmax : int, default=100
            The maximum number of cluster centroids to store (i.e., size of
            memory). This parameter controls the 'scale' of the desired
            solution, such that larger values of `kmax` will lead to a higher
            resolution cluster solution.
        """
        self.kmax = kmax
        self.npoints = 0
        self.centroid_factory = centroid_factory
        self.fastpair = FastPair(10, dist=dist)
Ejemplo n.º 18
0
    def test_iter(self, PointSet):
        ps = PointSet
        fp = FastPair().build(ps)
        assert fp.min_points == 10
        assert isinstance(fp.dist, FunctionType)
        my_iter = iter(fp)
        assert next(my_iter) in set(ps)
        assert fp[ps[0]].neigh in set(ps)

        try:
            myitem = fp[(2, 3, 4)]
        except KeyError as err:
            print(err)

        fp[ps[0]] = fp[ps[0]].neigh
        try:
            fp[(2, 3, 4)] = fp[ps[0]].neigh
        except KeyError as err:
            print(err)
Ejemplo n.º 19
0
 def test_find_neighbor_and_sdist(self, PointSet):
     ps = PointSet
     fp = FastPair().build(ps)
     rando = rand_tuple(len(ps[0]))
     neigh = fp._find_neighbor(rando)  # Abusing find_neighbor!
     dist = fp.dist(rando, neigh["neigh"])
     assert abs(dist - neigh["dist"]) < 1e-8
     assert len(fp) == len(ps)  # Make sure we didn't add a point...
     l = [(fp.dist(a, b), b) for a, b in zip(cycle([rando]), ps)]
     res = min(l, key=itemgetter(0))
     assert abs(res[0] - neigh["dist"]) < 1e-8
     assert res[1] == neigh["neigh"]
     res = min(fp.sdist(rando), key=itemgetter(0))
     assert abs(neigh["dist"] - res[0]) < 1e-8
     assert neigh["neigh"] == res[1]
Ejemplo n.º 20
0
 def test_merge_closest(self):
     # This needs to be 'fleshed' out more... lots of things to test here
     random.seed(1234)
     ps = PointSet(d=4)
     fp = FastPair().build(ps)
     # fp2 = FastPair().build(ps)
     n = len(ps)
     while n >= 2:
         dist, (a, b) = fp.closest_pair()
         new = interact(a, b)
         fp -= b  # Drop b
         fp._update_point(a, new)
         n -= 1
     assert len(fp) == 1 == n
     points = [(0.69903599809571437, 0.52457534006594131,
                0.7614753848101149, 0.37011695654655385)]
     assert all_close(fp.points[0], points[0])
     # Should have < 2 points now...
     with pytest.raises(ValueError):
         fp.closest_pair()
Ejemplo n.º 21
0
 def test_merge_closest(self):
     # This needs to be 'fleshed' out more... lots of things to test here
     random.seed(1234)
     ps = [rand_tuple(4) for _ in range(50)]
     fp = FastPair().build(ps)
     # fp2 = FastPair().build(ps)
     n = len(ps)
     while n >= 2:
         dist, (a, b) = fp.closest_pair()
         new = interact(a, b)
         fp -= b  # Drop b
         fp._update_point(a, new)
         n -= 1
     assert len(fp) == 1 == n
     points = [(
         0.69903599809571437,
         0.52457534006594131,
         0.7614753848101149,
         0.37011695654655385,
     )]
     assert all_close(fp.points[0], points[0])
     # Should have < 2 points now...
     with pytest.raises(ValueError):
         fp.closest_pair()
Ejemplo n.º 22
0
 def test_len(self):
     ps = PointSet()
     fp = FastPair()
     assert len(fp) == 0
     fp.build(ps)
     assert len(fp) == len(ps)
Ejemplo n.º 23
0
class AddC(object):
    """Implements the AddC clustering algorithm.

    For each data point arriving, the closest centroid to the incoming point
    is moved towards the point. If there are more than `kmax` centroids,
    then the two closest centroids are merged. This results in the creation of
    a redundant centroid; the redundant centroid is then set equal to the new
    data point. At any time, the data-structure can be queried for the current
    set of centroids/clusters, or updated with additional data points.
    """
    def __init__(self, kmax=100, dist=kernel_dist(gaussian),
                 centroid_factory=KernelCentroid):
        """Initialize an empty FastPair data-structure.

        Parameters
        ----------
        kmax : int, default=100
            The maximum number of cluster centroids to store (i.e., size of
            memory). This parameter controls the 'scale' of the desired
            solution, such that larger values of `kmax` will lead to a higher
            resolution cluster solution.
        """
        self.kmax = kmax
        self.npoints = 0
        self.centroid_factory = centroid_factory
        self.fastpair = FastPair(10, dist=dist)

    def __add__(self, p):
        """Add a point to the AddC sketch."""
        c = self.centroid_factory(p)  # Create an 'empty' centroid at point `p`
        self._step_one(c)
        self._step_two()
        self._step_three(c)
        self.npoints += 1  # Update count of points seen so far
        return self

    def __len__(self):
        """Number of points in the AddC sketch."""
        return len(self.fastpair)

    def __call__(self):
        """Return the current set of cluster centroids."""
        return self.centroids

    def __contains__(self, p):
        """Test if a given cluster centroid is in the AddC sketch."""
        return p in self.fastpair

    def __iter__(self):
        return iter(self.fastpair)

    def _step_one(self, c):
        # Step 1: Move the closest centroid towards the point
        if len(self.fastpair) > 0:
            # Single pass through list of neighbor points... this could also
            # be sped up with a spatial index, though harder to do with
            # kernel-induced distances
            # Alternatively, if it was possible to insert the new data point
            # _before_ querying for the closest pair (`step_two`), then we
            # could do it that way...
            old = min(self.fastpair.sdist(c), key=itemgetter(0))[1]
            self.fastpair._update_point(old, old.add(c))

    def _step_two(self):
        # Step 2: Merge the two closest centroids
        if len(self.fastpair) >= self.kmax and len(self.fastpair) > 1:
            dist, (a, b) = self.fastpair.closest_pair()
            self.fastpair -= b
            self.fastpair._update_point(a, a.merge(b))  # Update point `a`

    def _step_three(self, c):
        # Step 3: Set redundant centroid equal to new point
        self.fastpair += c

    def batch(self, points):
        # No checks, no nothing... just batch processing, pure and simple
        for point in points:
            self += point
        return self

    def trim(self, p=0.01):
        """Return only clusters over threshold."""
        sub = [x.size for x in self if x.size > 0]
        t = (sum(sub)/len(sub)) * p
        return [x for x in self if x.size >= t]

    @property
    def centroids(self):
        """For plotting."""
        return [c.center for c in self.fastpair]
Ejemplo n.º 24
0
class AddC(object):
    """Implements the AddC clustering algorithm.

    For each data point arriving, the closest centroid to the incoming point
    is moved towards the point. If there are more than `kmax` centroids,
    then the two closest centroids are merged. This results in the creation of
    a redundant centroid; the redundant centroid is then set equal to the new
    data point. At any time, the data-structure can be queried for the current
    set of centroids/clusters, or updated with additional data points.
    """
    def __init__(self,
                 kmax=100,
                 dist=kernel_dist(gaussian),
                 centroid_factory=KernelCentroid):
        """Initialize an empty FastPair data-structure.

        Parameters
        ----------
        kmax : int, default=100
            The maximum number of cluster centroids to store (i.e., size of
            memory). This parameter controls the 'scale' of the desired
            solution, such that larger values of `kmax` will lead to a higher
            resolution cluster solution.
        """
        self.kmax = kmax
        self.npoints = 0
        self.centroid_factory = centroid_factory
        self.fastpair = FastPair(10, dist=dist)

    def __add__(self, p):
        """Add a point to the AddC sketch."""
        c = self.centroid_factory(p)  # Create an 'empty' centroid at point `p`
        self._step_one(c)
        self._step_two()
        self._step_three(c)
        self.npoints += 1  # Update count of points seen so far
        return self

    def __len__(self):
        """Number of points in the AddC sketch."""
        return len(self.fastpair)

    def __call__(self):
        """Return the current set of cluster centroids."""
        return self.centroids

    def __contains__(self, p):
        """Test if a given cluster centroid is in the AddC sketch."""
        return p in self.fastpair

    def __iter__(self):
        return iter(self.fastpair)

    def _step_one(self, c):
        # Step 1: Move the closest centroid towards the point
        if len(self.fastpair) > 0:
            # Single pass through list of neighbor points... this could also
            # be sped up with a spatial index, though harder to do with
            # kernel-induced distances
            # Alternatively, if it was possible to insert the new data point
            # _before_ querying for the closest pair (`step_two`), then we
            # could do it that way...
            old = min(self.fastpair.sdist(c), key=itemgetter(0))[1]
            self.fastpair._update_point(old, old.add(c))

    def _step_two(self):
        # Step 2: Merge the two closest centroids
        if len(self.fastpair) >= self.kmax and len(self.fastpair) > 1:
            dist, (a, b) = self.fastpair.closest_pair()
            self.fastpair -= b
            self.fastpair._update_point(a, a.merge(b))  # Update point `a`

    def _step_three(self, c):
        # Step 3: Set redundant centroid equal to new point
        self.fastpair += c

    def batch(self, points):
        # No checks, no nothing... just batch processing, pure and simple
        for point in points:
            self += point
        return self

    def trim(self, p=0.01):
        """Return only clusters over threshold."""
        sub = [x.size for x in self if x.size > 0]
        t = (sum(sub) / len(sub)) * p
        return [x for x in self if x.size >= t]

    @property
    def centroids(self):
        """For plotting."""
        return [c.center for c in self.fastpair]
Ejemplo n.º 25
0
 def test_build(self, PointSet):
     ps = PointSet
     fp = FastPair().build(ps)
     assert len(fp) == len(ps)
     assert len(fp.neighbors) == len(ps)
     assert fp.initialized is True
Ejemplo n.º 26
0
 def test_len(self, PointSet):
     ps = PointSet
     fp = FastPair()
     assert len(fp) == 0
     fp.build(ps)
     assert len(fp) == len(ps)
Ejemplo n.º 27
0
 def test_contains(self, PointSet):
     ps = PointSet
     fp = FastPair()
     assert ps[0] not in fp
     fp.build(ps)
     assert ps[0] in fp
Ejemplo n.º 28
0
 def test_contains(self):
     ps = PointSet()
     fp = FastPair()
     assert ps[0] not in fp
     fp.build(ps)
     assert ps[0] in fp