Beispiel #1
0
 def update(self):
     if self.coveredDataset == self.nextDataset:
         self.nextDataset = Dataset(normalized=True, data=[])
         return False
     self.coveredDataset = self.nextDataset.copy()
     self.position = self.coveredDataset.median()
     return True
Beispiel #2
0
 def getOffsetsGrouped(self, groups, image, kpsA, kpsB, matches, status):
     offsets = self.getOffsets(kpsA, kpsB, matches, status)
     self.offsetsDataset = Dataset(data=offsets)
     k = BisectingKmeans(dataset=self.offsetsDataset,
                         k=groups,
                         trials=5,
                         maxRounds=10,
                         key=lambda x: [x[0], x[2], x[3]])
     k.run()
     return k.means
Beispiel #3
0
 def __init__(self, id, dimensions, key=lambda x: x):
     self.id = id
     self.dimensions = dimensions
     self.key = key
     if self.dimensions is None:
         self.position = []
     else:
         self.position = [random.random() for i in range(dimensions)]
     self.coveredDataset = None
     self.nextDataset = Dataset(normalized=True, data=[])
     self.dirt = False
     self.meanSquaredError = None
     self.totalSquaredError = None
Beispiel #4
0
 def clear(self):
     self.nextDataset = Dataset(normalized=True, data=[])
Beispiel #5
0
    def __init__(self, dataset, k, trials, maxRounds, key=lambda x: x):
        """
        dataset - The aim dataset
        k - The number of means
        trials - How many times the algorithm will be executed
        maxRounds - The maximum number of iterations before stop each execution
        """
        self.dataset = dataset
        self.k = k
        self.trials = trials
        self.maxRounds = maxRounds
        self.key = key

    def run(self):
        self.solutions = [KmeansSolution(self.dataset, self.k, self.maxRounds, key=self.key)
                          for t in range(self.trials)]
        self.solutions.sort(key=lambda s: s.meanSquaredError)

    def getBestSolution(self):
        return self.solutions[0]

    def showResults(self):
        for solution in self.solutions:
            print(solution)

if __name__ == "__main__":
    ds = Dataset(data=[[0, 0], [1, 1]])
    k = Kmeans(dataset=ds, k=2, trials=5, maxRounds=3)
    k.run()
    k.showResults()
Beispiel #6
0
from kmeans.dataset import Dataset
from kmeans.bisectingKmeans import BisectingKmeans
from utils import DataLoader, MeansVisualizer, KneeFinder

irisFields = [{
    'name': 'sepal length'
}, {
    'name': 'sepal width'
}, {
    'name': 'petal length'
}, {
    'name': 'petal width'
}, {
    'name': 'class',
    'types': {
        'Iris-setosa': 0,
        'Iris-versicolor': 1,
        'Iris-virginica': 2
    }
}]

loader = DataLoader('iris.data', irisFields)
ds = Dataset(data=loader.data)
kf = KneeFinder(dataset=ds, krange=[1, 10], trials=50, maxRounds=100)
kf.run()
kf.show()
bisection = BisectingKmeans(dataset=ds, k=2, trials=30, maxRounds=100)
bisection.run()
visualizer = MeansVisualizer(bisection.means, irisFields)
visualizer.show()
                worstDataset = worstCluster.coveredDataset

            bisection = Kmeans(dataset=worstDataset,
                               k=2,
                               trials=self.trials,
                               maxRounds=self.trials,
                               key=self.key)
            bisection.run()
            bisectionSolution = bisection.getBestSolution()

            self.means += bisectionSolution.means

            worstCluster = max(self.means,
                               key=lambda m: m.getMeanSquaredError())

            # if the number of means is not enouth remove the worst cluster
            # found to bisect it in the next iteration.
            if len(self.means) < self.k:
                self.means.remove(worstCluster)
        self.setMeanSquaredError()

    def showResults(self):
        print('\n\n'.join([str(mean) for mean in self.means]))


if __name__ == "__main__":
    ds = Dataset(data=[[0, 0], [1, 1], [0.9, 0.9], [0.5, 0.5]])
    bisection = BisectingKmeans(dataset=ds, k=4, trials=20, maxRounds=3)
    bisection.run()
    bisection.showResults()