def update(self): if self.coveredDataset == self.nextDataset: self.nextDataset = Dataset(normalized=True, data=[]) return False self.coveredDataset = self.nextDataset.copy() self.position = self.coveredDataset.median() return True
def getOffsetsGrouped(self, groups, image, kpsA, kpsB, matches, status): offsets = self.getOffsets(kpsA, kpsB, matches, status) self.offsetsDataset = Dataset(data=offsets) k = BisectingKmeans(dataset=self.offsetsDataset, k=groups, trials=5, maxRounds=10, key=lambda x: [x[0], x[2], x[3]]) k.run() return k.means
def __init__(self, id, dimensions, key=lambda x: x): self.id = id self.dimensions = dimensions self.key = key if self.dimensions is None: self.position = [] else: self.position = [random.random() for i in range(dimensions)] self.coveredDataset = None self.nextDataset = Dataset(normalized=True, data=[]) self.dirt = False self.meanSquaredError = None self.totalSquaredError = None
def clear(self): self.nextDataset = Dataset(normalized=True, data=[])
def __init__(self, dataset, k, trials, maxRounds, key=lambda x: x): """ dataset - The aim dataset k - The number of means trials - How many times the algorithm will be executed maxRounds - The maximum number of iterations before stop each execution """ self.dataset = dataset self.k = k self.trials = trials self.maxRounds = maxRounds self.key = key def run(self): self.solutions = [KmeansSolution(self.dataset, self.k, self.maxRounds, key=self.key) for t in range(self.trials)] self.solutions.sort(key=lambda s: s.meanSquaredError) def getBestSolution(self): return self.solutions[0] def showResults(self): for solution in self.solutions: print(solution) if __name__ == "__main__": ds = Dataset(data=[[0, 0], [1, 1]]) k = Kmeans(dataset=ds, k=2, trials=5, maxRounds=3) k.run() k.showResults()
from kmeans.dataset import Dataset from kmeans.bisectingKmeans import BisectingKmeans from utils import DataLoader, MeansVisualizer, KneeFinder irisFields = [{ 'name': 'sepal length' }, { 'name': 'sepal width' }, { 'name': 'petal length' }, { 'name': 'petal width' }, { 'name': 'class', 'types': { 'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2 } }] loader = DataLoader('iris.data', irisFields) ds = Dataset(data=loader.data) kf = KneeFinder(dataset=ds, krange=[1, 10], trials=50, maxRounds=100) kf.run() kf.show() bisection = BisectingKmeans(dataset=ds, k=2, trials=30, maxRounds=100) bisection.run() visualizer = MeansVisualizer(bisection.means, irisFields) visualizer.show()
worstDataset = worstCluster.coveredDataset bisection = Kmeans(dataset=worstDataset, k=2, trials=self.trials, maxRounds=self.trials, key=self.key) bisection.run() bisectionSolution = bisection.getBestSolution() self.means += bisectionSolution.means worstCluster = max(self.means, key=lambda m: m.getMeanSquaredError()) # if the number of means is not enouth remove the worst cluster # found to bisect it in the next iteration. if len(self.means) < self.k: self.means.remove(worstCluster) self.setMeanSquaredError() def showResults(self): print('\n\n'.join([str(mean) for mean in self.means])) if __name__ == "__main__": ds = Dataset(data=[[0, 0], [1, 1], [0.9, 0.9], [0.5, 0.5]]) bisection = BisectingKmeans(dataset=ds, k=4, trials=20, maxRounds=3) bisection.run() bisection.showResults()