class Mean(): """ A mean of the k-means. """ def __init__(self, id, dimensions, key=lambda x: x): self.id = id self.dimensions = dimensions self.key = key if self.dimensions is None: self.position = [] else: self.position = [random.random() for i in range(dimensions)] self.coveredDataset = None self.nextDataset = Dataset(normalized=True, data=[]) self.dirt = False self.meanSquaredError = None self.totalSquaredError = None def update(self): if self.coveredDataset == self.nextDataset: self.nextDataset = Dataset(normalized=True, data=[]) return False self.coveredDataset = self.nextDataset.copy() self.position = self.coveredDataset.median() return True def clear(self): self.nextDataset = Dataset(normalized=True, data=[]) def cover(self, point): self.nextDataset.append(point) self.dirt = True def __repr__(self): return "<Mean id: %i position:%s dataset:%s>" % ( self.id, str(self.position), self.coveredDataset.__repr__()) def distanceSqrd(self, point): def distSqrd(v1, v2): return sum([(j - v2[i]) ** 2 for i, j in enumerate(v1)]) return distSqrd(self.key(point), self.position) def getMeanSquaredError(self, key=lambda x: x): if len(self.coveredDataset) == 0: return float('inf') if self.dirt or self.meanSquaredError is None: self.meanSquaredError = self.getTotalSquaredError() /\ len(self.coveredDataset) self.dirt = False return self.meanSquaredError def getTotalSquaredError(self, key=lambda x: x): if self.dirt or self.meanSquaredError is None: squaredDists = [self.distanceSqrd(point) for point in self.coveredDataset] self.totalSquaredError = sum(squaredDists) self.dirt = False return self.totalSquaredError def getCoveredDataset(self, limits=None, normalized=True): if normalized: return self.coveredDataset.genUnnormalized(limits) else: return self.coveredDataset