Example #1
0
class Mean():
    """
    A mean of the k-means.
    """
    def __init__(self, id, dimensions, key=lambda x: x):
        self.id = id
        self.dimensions = dimensions
        self.key = key
        if self.dimensions is None:
            self.position = []
        else:
            self.position = [random.random() for i in range(dimensions)]
        self.coveredDataset = None
        self.nextDataset = Dataset(normalized=True, data=[])
        self.dirt = False
        self.meanSquaredError = None
        self.totalSquaredError = None

    def update(self):
        if self.coveredDataset == self.nextDataset:
            self.nextDataset = Dataset(normalized=True, data=[])
            return False
        self.coveredDataset = self.nextDataset.copy()
        self.position = self.coveredDataset.median()
        return True

    def clear(self):
        self.nextDataset = Dataset(normalized=True, data=[])

    def cover(self, point):
        self.nextDataset.append(point)
        self.dirt = True

    def __repr__(self):
        return "<Mean id: %i position:%s dataset:%s>" % (
            self.id, str(self.position), self.coveredDataset.__repr__())

    def distanceSqrd(self, point):
        def distSqrd(v1, v2):
            return sum([(j - v2[i]) ** 2 for i, j in enumerate(v1)])
        return distSqrd(self.key(point), self.position)

    def getMeanSquaredError(self, key=lambda x: x):
        if len(self.coveredDataset) == 0:
            return float('inf')
        if self.dirt or self.meanSquaredError is None:
            self.meanSquaredError = self.getTotalSquaredError() /\
                len(self.coveredDataset)
            self.dirt = False
        return self.meanSquaredError

    def getTotalSquaredError(self, key=lambda x: x):
        if self.dirt or self.meanSquaredError is None:
            squaredDists = [self.distanceSqrd(point)
                            for point in self.coveredDataset]
            self.totalSquaredError = sum(squaredDists)
            self.dirt = False
        return self.totalSquaredError

    def getCoveredDataset(self, limits=None, normalized=True):
        if normalized:
            return self.coveredDataset.genUnnormalized(limits)
        else:
            return self.coveredDataset