Esempio n. 1
0
def kmeansTest(k=2, n=20, verbose=False):
    random.seed(0)
    xMean = 3
    xSD = 1
    yMean = 5
    ySD = 1
    
    d1Samples = util.genDistribution(xMean, xSD, yMean, ySD, n, '1.')
    d2Samples = util.genDistribution(xMean+3, xSD, yMean+1, ySD, n, '2.')
    allSamples = d1Samples + d2Samples
    
    print("before clustering")
    util.plot_cluster([cluster.Cluster(allSamples)])
    
    print("after clustering")
    clusters = kmeans(allSamples, k, verbose)
    util.plot_cluster(clusters, verbose)
    
    print('Final result')
    for c in clusters:
        print('', c)    
Esempio n. 2
0
def make_data(n, scale=1):
    """ A simple y = x curve, with noisy displacement on both
        both x and y axis; change scale to change the range
    """
    linear_data = [
        sample.Sample('',
                      [float(x) / scale, float(x) / scale], '')
        for x in range(n)
    ]
    noise = util.genDistribution(xSD=0.3, ySD=0.3, n=n)
    data = [linear_data[i] + noise[i] for i in range(n)]
    return data
Esempio n. 3
0
def make_data(n):
    C = [random.choice(LABELS) for x in range(n)]
    linear_data = [
        sample.Sample(C[x], [x / (float(SCALE)), x / (float(SCALE))], C[x])
        for x in range(n)
    ]
    mean = 0
    std = DEV * SCALE

    noise = util.genDistribution(mean, std, mean, std, n, '')
    data = [linear_data[i] + noise[i] for i in range(n)]
    return data
Esempio n. 4
0
    #### Implement the centroid updating function here!
    def update(self, samples):
        """Replace the samples in the cluster by new samples
           Return: how much the centroid has changed"""
        return helper.update(self, samples)

    def __str__(self):
        names = []
        for e in self.samples:
            names.append(e.getName())
        names.sort()
        result = 'Cluster with centroid '\
                 + str(self.centroid.getFeatures()) + ' contains:\n  '
        for e in names:
            result = result + e + ', '
        return result[:-2]


if __name__ == "__main__":
    test_samples = util.genDistribution()
    c = Cluster(test_samples)
    print(c.centroid)
    print("cluster center: ", c.centroid.features)
    util.plot_cluster([c])

    # now assign the cluster new samples, and move it
    test_samples2 = util.genDistribution(1, 1, 1, 1, 30)
    diff = c.update(test_samples2)
    print("center moved: ", diff)
    # plot_cluster expects an array of cluster...
    util.plot_cluster([c])
Esempio n. 5
0
    print(p)


if __name__ == "__main__":

    # make data
    random.seed(0)
    n = 100
    K = 3
    LABELS = ('a', 'b', 'c')
    all_cluster = []
    data = []
    for i in range(K):
        tmp_data = util.genDistribution(i * 2 + 1,
                                        1,
                                        i * 2 + 1,
                                        1,
                                        n=20,
                                        label=LABELS[i])
        all_cluster.append(cl.Cluster(tmp_data))
        data += tmp_data

    def onclick(event):
        # Creating a new point and finding the k nearest neighbours
        new = sample.Sample('', [event.xdata, event.ydata], '')
        knn(new, data, K)

        # draw the new point
        data.append(new)
        pylab.scatter([new.getFeatures()[0]], \
                      [new.getFeatures()[1]], \
                      label = new.getLabel(), \
Esempio n. 6
0
    """

    max_label = util.LABELS[0]
    p.setLabel(max_label)
    # above forces a fixed label: remove them
    # replace knn_helper.knn(p, data, k) with your own logic
    print(p)
    knn_helper.knn(p, data, k)
    print(p)


if __name__ == "__main__":

    random.seed(0)
    n = 100
    K = 3

    data = util.genDistribution(n=10)
    for d in data:
        d.setLabel(random.choice(util.LABELS))

    print("before....")
    util.plot_data(data)

    new_pt = sample.Sample('', [0.2, 0.3], '')
    knn(new_pt, data, K)

    data.append(new_pt)
    print("\nafter....")
    util.plot_data(data)