예제 #1
0
    def cluster(self, data, n_clusters):

        n, d = shape(data)
        locations = zeros((self.n_particles, n_clusters, d))

        for i in range(self.n_particles):
            for j in range(n_clusters):
                locations[i, j, :] = copy(data[randint(n), :])  # Initialize cluster centers to random datapoints

        bestlocations = copy(locations)
        velocities = zeros((self.n_particles, n_clusters, d))

        bestscores = [score(data, centroids=locations[i, :, :], norm=self.norm) for i in range(self.n_particles)]
        sbestlocation = copy(locations[argmin(bestscores), :, :])
        sbestscore = min(bestscores)

        for i in range(self.n_iterations):
            if i % self.printfreq == 0:
                print "Particle swarm iteration", i, "best score:", sbestscore
            for j in range(self.n_particles):
                r = rand(n_clusters, d)
                s = rand(n_clusters, d)
                velocities[j, :, :] = (self.w * velocities[j, :, :]) + \
                                      (self.c1 * r * (bestlocations[j, :, :] - locations[j, :, :])) + \
                                      (self.c2 * s * (sbestlocation - locations[j, :, :]))
                locations[j, :, :] = locations[j, :, :] + velocities[j, :, :]
                currentscore = score(data, centroids=locations[j, :, :], norm=self.norm)
                if currentscore < bestscores[j]:
                    bestscores[j] = currentscore
                    bestlocations[j, :, :] = locations[j, :, :]
                    if currentscore < sbestscore:
                        sbestscore = currentscore
                        sbestlocation = copy(locations[j, :, :])

        return getlabels(data, centroids=sbestlocation, norm=self.norm)
예제 #2
0
    def cluster(self, data, n_clusters):

        n, d = shape(data)
        locations = zeros((self.n_bees, n_clusters, d))

        for i in range(self.n_bees):
            for j in range(n_clusters):
                locations[i, j, :] = copy(data[randint(n), :])  # Initialize cluster centers to random datapoints

        currentscore = array([score(data, centroids=locations[i, :, :], norm=self.norm) for i in range(self.n_bees)])
        changecount = zeros(self.n_bees)
        bestlocation = copy(locations[argmin(currentscore), :, :])
        bestscore = min(currentscore)

        for it in range(self.n_iter):
            if it % self.printfreq == 0:
                print "Artificial Bee iteration", it, "best score:", bestscore

            for k in range(self.n_bees):
                newcentroids, newscore = self.getnewcentroids(data, locations, k)
                locations, currentscore, bestscore, bestlocation, changecount = self.update(
                    k, locations, newscore, newcentroids, currentscore, bestscore, bestlocation, changecount, True)

            for _ in range(self.n_bees):
                k = choice(self.n_bees, p=currentscore/sum(currentscore))
                newcentroids, newscore = self.getnewcentroids(data, locations, k)
                locations, currentscore, bestscore, bestlocation, changecount = self.update(
                    k, locations, newscore, newcentroids, currentscore, bestscore, bestlocation, changecount, False)

            for k in nonzero(changecount >= self.limit):
                newcentroids = array([data[randint(n), :] for _ in range(n_clusters)], copy=True)
                newscore = score(data, centroids=newcentroids, norm=self.norm)
                locations, currentscore, bestscore, bestlocation, changecount = self.update(
                    k, locations, newscore, newcentroids, currentscore, bestscore, bestlocation, changecount, False)

        return getlabels(data, centroids=bestlocation, norm=self.norm)
예제 #3
0
    def cluster(self, data, n_clusters):

        n_samples, _ = shape(data)
        assert self.n_ants < n_samples, "number of ants must be lower than number of samples"

        bestscore = float('inf')
        bestcentroids = None
        bestweights = None

        pheromone = self.t0 * ones((n_samples, n_clusters))

        for it in range(self.n_iter):

            for _ in range(self.n_ants):

                # memory = -1 * ones(n_samples, dtype='int')
                weights = zeros((n_samples, n_clusters), dtype='bool')
                centroids = array([data[randint(n_samples), :] for _ in range(n_clusters)], copy=True)

                for i in permutation(n_samples):
                    scores = self.centroidscore(data[i, :], centroids, pheromone[i, :], self.beta)

                    if randfloat() < self.q0:
                        j = argmax(scores)  # exploit
                    else:
                        j = choice(n_clusters, p=(scores / sum(scores)))  # explore

                    weights[i, j] = True
                    centroids[j, :] = average(data, axis=0, weights=weights[:, j])

                currentscore = score(data, centroids=centroids, norm=self.beta)
                if currentscore < bestscore:
                    bestscore = currentscore
                    bestcentroids = copy(centroids)
                    bestweights = copy(weights)

            pheromone = (self.ro * pheromone) + ((1.0 / bestscore) * bestweights)

            if it % self.printfreq == 0:
                print "Ant Colony iteration", it, "best score:", bestscore

        return getlabels(data, centroids=bestcentroids, norm=self.beta)
예제 #4
0
 def getnewcentroids(self, data, locations, k):
     a = randint(self.n_bees)
     theta = randfloat(-1.0, 1.0)
     newcentroids = locations[k, :, :] + (theta * (locations[k, :, :] - locations[a, :, :]))
     newscore = score(data, centroids=newcentroids, norm=self.norm)
     return newcentroids, newscore