Beispiel #1
0
 def update(self):
     if self.coveredDataset == self.nextDataset:
         self.nextDataset = Dataset(normalized=True, data=[])
         return False
     self.coveredDataset = self.nextDataset.copy()
     self.position = self.coveredDataset.median()
     return True
Beispiel #2
0
 def __init__(self, id, dimensions, key=lambda x: x):
     self.id = id
     self.dimensions = dimensions
     self.key = key
     if self.dimensions is None:
         self.position = []
     else:
         self.position = [random.random() for i in range(dimensions)]
     self.coveredDataset = None
     self.nextDataset = Dataset(normalized=True, data=[])
     self.dirt = False
     self.meanSquaredError = None
     self.totalSquaredError = None
Beispiel #3
0
 def getOffsetsGrouped(self, groups, image, kpsA, kpsB, matches, status):
     offsets = self.getOffsets(kpsA, kpsB, matches, status)
     self.offsetsDataset = Dataset(data=offsets)
     k = BisectingKmeans(dataset=self.offsetsDataset,
                         k=groups,
                         trials=5,
                         maxRounds=10,
                         key=lambda x: [x[0], x[2], x[3]])
     k.run()
     return k.means
Beispiel #4
0
class Mean():
    """
    A mean of the k-means.
    """
    def __init__(self, id, dimensions, key=lambda x: x):
        self.id = id
        self.dimensions = dimensions
        self.key = key
        if self.dimensions is None:
            self.position = []
        else:
            self.position = [random.random() for i in range(dimensions)]
        self.coveredDataset = None
        self.nextDataset = Dataset(normalized=True, data=[])
        self.dirt = False
        self.meanSquaredError = None
        self.totalSquaredError = None

    def update(self):
        if self.coveredDataset == self.nextDataset:
            self.nextDataset = Dataset(normalized=True, data=[])
            return False
        self.coveredDataset = self.nextDataset.copy()
        self.position = self.coveredDataset.median()
        return True

    def clear(self):
        self.nextDataset = Dataset(normalized=True, data=[])

    def cover(self, point):
        self.nextDataset.append(point)
        self.dirt = True

    def __repr__(self):
        return "<Mean id: %i position:%s dataset:%s>" % (
            self.id, str(self.position), self.coveredDataset.__repr__())

    def distanceSqrd(self, point):
        def distSqrd(v1, v2):
            return sum([(j - v2[i]) ** 2 for i, j in enumerate(v1)])
        return distSqrd(self.key(point), self.position)

    def getMeanSquaredError(self, key=lambda x: x):
        if len(self.coveredDataset) == 0:
            return float('inf')
        if self.dirt or self.meanSquaredError is None:
            self.meanSquaredError = self.getTotalSquaredError() /\
                len(self.coveredDataset)
            self.dirt = False
        return self.meanSquaredError

    def getTotalSquaredError(self, key=lambda x: x):
        if self.dirt or self.meanSquaredError is None:
            squaredDists = [self.distanceSqrd(point)
                            for point in self.coveredDataset]
            self.totalSquaredError = sum(squaredDists)
            self.dirt = False
        return self.totalSquaredError

    def getCoveredDataset(self, limits=None, normalized=True):
        if normalized:
            return self.coveredDataset.genUnnormalized(limits)
        else:
            return self.coveredDataset
Beispiel #5
0
 def clear(self):
     self.nextDataset = Dataset(normalized=True, data=[])
Beispiel #6
0
    def __init__(self, dataset, k, trials, maxRounds, key=lambda x: x):
        """
        dataset - The aim dataset
        k - The number of means
        trials - How many times the algorithm will be executed
        maxRounds - The maximum number of iterations before stop each execution
        """
        self.dataset = dataset
        self.k = k
        self.trials = trials
        self.maxRounds = maxRounds
        self.key = key

    def run(self):
        self.solutions = [KmeansSolution(self.dataset, self.k, self.maxRounds, key=self.key)
                          for t in range(self.trials)]
        self.solutions.sort(key=lambda s: s.meanSquaredError)

    def getBestSolution(self):
        return self.solutions[0]

    def showResults(self):
        for solution in self.solutions:
            print(solution)

if __name__ == "__main__":
    ds = Dataset(data=[[0, 0], [1, 1]])
    k = Kmeans(dataset=ds, k=2, trials=5, maxRounds=3)
    k.run()
    k.showResults()
Beispiel #7
0
from kmeans.dataset import Dataset
from kmeans.bisectingKmeans import BisectingKmeans
from utils import DataLoader, MeansVisualizer, KneeFinder

irisFields = [{
    'name': 'sepal length'
}, {
    'name': 'sepal width'
}, {
    'name': 'petal length'
}, {
    'name': 'petal width'
}, {
    'name': 'class',
    'types': {
        'Iris-setosa': 0,
        'Iris-versicolor': 1,
        'Iris-virginica': 2
    }
}]

loader = DataLoader('iris.data', irisFields)
ds = Dataset(data=loader.data)
kf = KneeFinder(dataset=ds, krange=[1, 10], trials=50, maxRounds=100)
kf.run()
kf.show()
bisection = BisectingKmeans(dataset=ds, k=2, trials=30, maxRounds=100)
bisection.run()
visualizer = MeansVisualizer(bisection.means, irisFields)
visualizer.show()
                worstDataset = worstCluster.coveredDataset

            bisection = Kmeans(dataset=worstDataset,
                               k=2,
                               trials=self.trials,
                               maxRounds=self.trials,
                               key=self.key)
            bisection.run()
            bisectionSolution = bisection.getBestSolution()

            self.means += bisectionSolution.means

            worstCluster = max(self.means,
                               key=lambda m: m.getMeanSquaredError())

            # if the number of means is not enouth remove the worst cluster
            # found to bisect it in the next iteration.
            if len(self.means) < self.k:
                self.means.remove(worstCluster)
        self.setMeanSquaredError()

    def showResults(self):
        print('\n\n'.join([str(mean) for mean in self.means]))


if __name__ == "__main__":
    ds = Dataset(data=[[0, 0], [1, 1], [0.9, 0.9], [0.5, 0.5]])
    bisection = BisectingKmeans(dataset=ds, k=4, trials=20, maxRounds=3)
    bisection.run()
    bisection.showResults()