コード例 #1
0
def kmeans(X, k):
    # Intialize centroids
    centroids = simpleInitialization(X, k)

    #print(centroids)

    # Initialize variables
    iterations = 0
    oldCentroids = None
    labels = zeros(X.shape[0])

    # ====================== ADD YOUR CODE HERE ======================
    # Instructions: Run the main k-means algorithm. Follow the steps
    #               given in the description. Compute the distance
    #               between each instance and each centroid. Assign
    #               the instance to the cluster described by the closest
    #               centroid. Repeat the above steps until the centroids
    #               stop moving or reached a certain number of iterations
    #               (e.g., 100).

    labels = []
    for i in range(100):
        c = []
        for j in range(len(centroids)):
            c.append(linalg.norm(X[i] - centroids[j]))
            labels += where(c == min(c))

    for point in range(len(X)):
        for j in range(len(centroids)):
            print X[labels[i] == j]
    # ===============================================================

    return labels
コード例 #2
0
def kmeans(X, k):
    # Intialize centroids
    centroids = simpleInitialization(X, k)
    #centroids = kmeansppInitialization(X,k)

    # Initialize book keeping vars.
    iterations = 0
    oldCentroids = None

    # Run the main k-means algorithm
    while not shouldStop(oldCentroids, centroids, iterations):
        # Save old centroids for convergence test.
        oldCentroids = centroids
        iterations += 1

        # Compute distances from the centroid points
        distances = np.array(
            [[euclideanDistance(Xi, centroid) for centroid in centroids]
             for Xi in X])
        # Compute nearest centroid indices
        labels = np.array([np.argmin(distance) for distance in distances])
        # Find new centroids
        centroids = [[
            np.sum(col) / len(col) for col in np.transpose(X[labels == i])
        ] for i in range(k)]

    return labels
コード例 #3
0
def kmeans(X, k):
    # Intialize centroids
    centroids = simpleInitialization(X, k)
    labels = np.zeros(X.shape[0])
    n_it = 100
    # ====================== ADD YOUR CODE HERE ======================
    # Instructions: Run the main k-means algorithm. Follow the steps
    #               given in the description. Compute the distance
    #               between each instance and each centroid. Assign
    #               the instance to the cluster described by the closest
    #               centroid. Repeat the above steps until the centroids
    #               stop moving or reached a certain number of iterations
    #               (e.g., 100).

    # ===============================================================

    # Run main k-means algorithm
    for i in range(n_it):
        # Compute distances from the centroid to points
        distances = np.array(
            [np.linalg.norm(X - centroid, axis=1) for centroid in centroids]).T

        #Compute nearest centroid indices
        labels = np.argmin(distances, axis=1)

        # Find new centroids
        before_centroids = np.copy(centroids)
        centroids = np.array(
            [np.mean(X[labels == i], axis=0) for i in range(k)])

        if (np.array_equal(before_centroids, centroids)):
            break
    print('We found the solution in ' + str(i) + ' iterations!')

    return labels
コード例 #4
0
def kmeans(X, k):
    # Intialize centroids
    C = simpleInitialization(X, k)

    print(C.shape)
    print(X.shape)

    # Initialize var iables
    n = X.shape[0]  # Size of the sample
    m = X.shape[1]  # Space dimension

    iterations = 0
    labels = zeros(n)
    oldC = zeros((k, m))
    C_sizes = zeros(k)
    threshold = 0.1

    # ====================== ADD YOUR CODE HERE ======================
    # Instructions: Run the main k-means algorithm. Follow the steps
    #               given in the description. Compute the distance
    #               between each instance and each centroid. Assign
    #               the instance to the cluster described by the closest
    #               centroid. Repeat the above steps until the centroids
    #               stop moving or reached a certain number of iterations
    #               (e.g., 100).

    while ((C - oldC) > threshold).any(
    ):  # While C is not converging (according to a certain threshold)
        for i in range(n):

            distances = zeros(k)

            for j in range(k):
                distances[j] = euclideanDistance(X[i, :], C[j])

            c_index = argmax(distances)
            labels[i] = c_index
            C_sizes[c_index] += 1

        oldC = copy.deepcopy(C)

        for j in range(k):
            if C_sizes[j] > 0:
                u = zeros(m)
                Xs_in_Cj = argwhere(labels == j)
                for i in Xs_in_Cj:
                    u = add(u, X[i, :])
                u /= C_sizes[j]
                C[j] = u

        iterations += 1

    # ===============================================================
    print("k-means exectued in {} iterations".format(iterations))
    return labels
コード例 #5
0
ファイル: kmeans.py プロジェクト: omoindrot/INF582
def kmeans(X, k):
    # Intialize centroids
    centroids = simpleInitialization(X, k)
    
    # Initialize variables
    iterations = 0
    oldCentroids = None
    labels = zeros(X.shape[0])
    
    # ====================== ADD YOUR CODE HERE ======================
    # Instructions: Run the main k-means algorithm. Follow the steps 
    #               given in the description. Compute the distance 
    #               between each instance and each centroid. Assign 
    #               the instance to the cluster described by the closest
    #               centroid. Repeat the above steps until the centroids
    #               stop moving or reached a certain number of iterations
    #               (e.g., 100).

    notConverged = True
    while notConverged:
        # update labels
        old_labels = labels.copy()
        for i in range(X.shape[0]):
            dist = inf
            for j in range(k):
                d = euclideanDistance(X[i], centroids[j])
                if d < dist:
                    labels[i] = j
                    dist = d

        # update centroids
        centroids = zeros((k, X.shape[1]))
        num_centroids = zeros((k, 1)) + 1e-8
        for i in range(X.shape[0]):
            centroids[labels[i]] += X[i]
            num_centroids[labels[i]] += 1.
        centroids = centroids/num_centroids

        iterations += 1
        if (old_labels == labels).all() or iterations>500:
            notConverged = False
            print iterations

    
    # ===============================================================
        
    return labels
コード例 #6
0
ファイル: kmeans.py プロジェクト: omoindrot/INF582
def kmeans(X, k):
    # Intialize centroids
    centroids = simpleInitialization(X, k)

    # Initialize variables
    iterations = 0
    oldCentroids = None
    labels = zeros(X.shape[0])

    # ====================== ADD YOUR CODE HERE ======================
    # Instructions: Run the main k-means algorithm. Follow the steps
    #               given in the description. Compute the distance
    #               between each instance and each centroid. Assign
    #               the instance to the cluster described by the closest
    #               centroid. Repeat the above steps until the centroids
    #               stop moving or reached a certain number of iterations
    #               (e.g., 100).

    notConverged = True
    while notConverged:
        # update labels
        old_labels = labels.copy()
        for i in range(X.shape[0]):
            dist = inf
            for j in range(k):
                d = euclideanDistance(X[i], centroids[j])
                if d < dist:
                    labels[i] = j
                    dist = d

        # update centroids
        centroids = zeros((k, X.shape[1]))
        num_centroids = zeros((k, 1)) + 1e-8
        for i in range(X.shape[0]):
            centroids[labels[i]] += X[i]
            num_centroids[labels[i]] += 1.
        centroids = centroids / num_centroids

        iterations += 1
        if (old_labels == labels).all() or iterations > 500:
            notConverged = False
            print iterations

    # ===============================================================

    return labels