コード例 #1
0
def kmeans(samples, k, verbose):
    """Assumes samples is a list of samples of class Sample,
         k is a positive int, verbose is a Boolean
       Returns a list containing k clusters. """
       
    # Get k randomly chosen initial centroids
    initialCentroids = random.sample(samples, k)
    
    # Create a singleton cluster for each centroid
    clusters = []
    for e in initialCentroids:
        clusters.append(cluster.Cluster([e]))
        
    # Iterate until centroids do not change
    converged = False
    numIterations = 0
    while not converged:
        
        numIterations += 1

        # replace the following line by implementing
        # kmeans_iter(samples, clusters, k) in this file
        converged = helper.kmeans_iter(samples, clusters, k)
        # converged = kmeans_iter(samples, clusters, k)

        if verbose:
            print('Iteration #' + str(numIterations))
            for c in clusters:
                print(c)
            print('\n')  # add blank line
    return clusters
コード例 #2
0
def kmeansTest(k=2, n=20, verbose=False):
    random.seed(0)
    xMean = 3
    xSD = 1
    yMean = 5
    ySD = 1
    
    d1Samples = util.genDistribution(xMean, xSD, yMean, ySD, n, '1.')
    d2Samples = util.genDistribution(xMean+3, xSD, yMean+1, ySD, n, '2.')
    allSamples = d1Samples + d2Samples
    
    print("before clustering")
    util.plot_cluster([cluster.Cluster(allSamples)])
    
    print("after clustering")
    clusters = kmeans(allSamples, k, verbose)
    util.plot_cluster(clusters, verbose)
    
    print('Final result')
    for c in clusters:
        print('', c)    
コード例 #3
0
#                    's', 'p', '*', 'h', 'H', 'D', 'd')
#     colors = ('b', 'g', 'c', 'm', 'y', 'k')
#     return [c + m for m in markers for c in colors]

def plot_cluster(clusters, verbose = False, centroid = True):
    MARKERS = make_cmarkers()
    COLORS = make_cmap()
    for l in range(len(clusters)):
        c = clusters[l]
        cm = COLORS[l]+ MARKERS[l]
        plotSamples(c.getMembers(), cm, verbose)
        if centroid:
            plotSamples([c.centroid], 'sr')
    plt.show()



if __name__ == "__main__":

    #print(minkowskiDist([0, 0], [1, 1], 1))
    #print(minkowskiDist([0, 0], [1, 1], 2))

    test_samples = genDistribution()
    c = cluster.Cluster(test_samples)
    plot_cluster([c])


    # plotSamples(test_samples, 'o')
    # plotSamples([test_samples[0]], 'sk')
    # plt.show()
コード例 #4
0
ファイル: knn.py プロジェクト: CaoWanQing/Exam-Review
    # make data
    random.seed(0)
    n = 100
    K = 3
    LABELS = ('a', 'b', 'c')
    all_cluster = []
    data = []
    for i in range(K):
        tmp_data = util.genDistribution(i * 2 + 1,
                                        1,
                                        i * 2 + 1,
                                        1,
                                        n=20,
                                        label=LABELS[i])
        all_cluster.append(cl.Cluster(tmp_data))
        data += tmp_data

    def onclick(event):
        # Creating a new point and finding the k nearest neighbours
        new = sample.Sample('', [event.xdata, event.ydata], '')
        knn(new, data, K)

        # draw the new point
        data.append(new)
        pylab.scatter([new.getFeatures()[0]], \
                      [new.getFeatures()[1]], \
                      label = new.getLabel(), \
                      marker = util.make_cmarkers()[LABELS.index(new.getLabel())], \
                      color = util.make_cmap()[LABELS.index(new.getLabel())])
        pylab.draw()