Ejemplo n.º 1
0
def train(image, k):
    """
    training the image by finding the best centroids, then creating the new image with those centroids
    :param image: data
    :param k: number of centroids
    :return:
    """
    print("k={}:".format(k))
    data_len = len(image)
    centroids = init_centroids(image, k)
    loss = []
    for epoch in range(EPOCH + 1):
        print("iter {}: {}".format(
            epoch, ", ".join([format_centroid(c) for c in centroids])))
        sums = [np.zeros(3)] * k
        nums = [0] * k
        iter_loss = 0
        for index, pixel in enumerate(image):
            centroid, i = choose_centroid(centroids, pixel)
            nums[i] += 1
            sums[i] = sums[i] + pixel
            iter_loss += np.power(distance(pixel, centroid), 2)
        centroids = [s / n for s, n in zip(sums, nums)]
        loss.append(iter_loss / data_len)
    new_img = []
    for i, pix in enumerate(image):
        centroid = list(choose_centroid(centroids, pix))[0]
        new_img.append(centroid)

    return np.asarray(new_img), loss
Ejemplo n.º 2
0
 def __init__(self, k, picture):
     self.k = k
     self.picture = picture
     self.centroids = init_centroids(X=picture, K=k)
     self.centroid_to_pixels = self.reset_centroids_dict()
     self.pixels_to_centroids = {}
     self.average_loss = []
Ejemplo n.º 3
0
def main():
    img, img_size = read_image()
    ## show_image(img, img_size)

    i = [2, 4, 8, 16]
    for k in i:
        print("k=%d:" % k)
        centroids = init_centroids.init_centroids(img, k)
        loss_array = []
        # do 10 iterations
        for j in range(11):
            # calculate the loss
            ## loss = calculate_loss(centroids, img.copy())
            ## loss_array.append(loss)
            # create an array for the new centroids
            new_centroids = np.zeros((k, 3))
            # print the centroids
            print_centroids(j, centroids.copy())
            # create index vector to count points for each centroid
            num_of_accumulated_pixels = np.zeros(k)
            # go over the image
            for pixel in img:
                # classify the pixel
                new_centroids, index = accumulate_centroids(
                    pixel, centroids, new_centroids)
                # add 1 to the centroid index
                num_of_accumulated_pixels[index] += 1
            # calculate the new centroids
            centroids = calculate_new_centroids(new_centroids,
                                                num_of_accumulated_pixels)
Ejemplo n.º 4
0
def k_means(k):
    """
    Executing the kmeans algorithm
    :param k: the number of the centroids
    :return: None
    """
    centroids = init_centroids.init_centroids(0, k)
    print("k=" + str(k) + ":")
    for iteration in range(0, 11):
        pixels = load.X.copy()
        my_dict = initial_dictionary(k)
        print("iter " + str(iteration) + ": ", end='')
        print_array(centroids, k)
        pixels_size = len(pixels)
        for pixel_idx in range(0, pixels_size):
            cen_idx = 0
            min_dis = distance_vectors(pixels[pixel_idx], centroids[0])
            for cen in range(1, k):
                dis = distance_vectors(pixels[pixel_idx], centroids[cen])
                if dis < min_dis:
                    cen_idx = cen
                    min_dis = dis
            my_dict[cen_idx].append(pixels[pixel_idx])
        for i in range(0, k):
            centroids[i] = average(my_dict[i])
    show_and_save_image(my_dict, centroids, k)
Ejemplo n.º 5
0
def k_mean(K):
    X = data_prep()
    centroids = init_centroids(X, K)
    clusters = np.zeros(len(X))
    centroids_new = deepcopy(centroids)  # create np.array new centroids

    print("k=" + str(K) + ":")
    counter = []
    for j in range(NUM_OF_ITER):
        count = 0
        for i in range(len(X)):
            distances = np.linalg.norm(X[i] - centroids, axis=-1)
            cluster = np.argmin(distances)
            clusters[i] = cluster
            count = count + distances[cluster]
        for i in range(K):
            centroids_new[i] = np.mean(X[clusters == i], axis=0)
        print("iter{0}:{1}".format(str(j), print_cent(centroids)))
        centroids = deepcopy(centroids_new)
        counter.append(count)
    final_image = np.zeros((clusters.shape[0], 3))
    for i in range(final_image.shape[0]):
        final_image[i] = centroids[int(clusters[i])]

    # Create Graphs and Image And save them .
    plt.imshow(final_image.reshape(128, 128, 3))
    r = 1
    plt.savefig('figure_%d_%d.png' % (K, r))
    plt.figure()
    plt.plot(range(NUM_OF_ITER), counter)
    r += 1
    plt.savefig('figure_%d_%d.png' % (K, r))
Ejemplo n.º 6
0
def main():
    dataset, img_size = load_image('dog.jpeg')
    # losss = []
    # f = open("output.txt", "w")

    for k in [2, 4, 8, 16]:
        centroid = init_centroids.init_centroids(dataset, k)
        print("k=" + str(k) + ":")
        classification, loss = k_means(dataset, centroid, k)
Ejemplo n.º 7
0
def main():
    img = loader.load_data(sys.argv[FIRST_ARG])
    for power in range(1, 5):
        centroids = init_centroids.init_centroids(np.power(2, power))
        model = k_means.KMeans(centroids, img)
        new_img = model.algorithm(EPOCH)
        new_img = np.reshape(
            new_img,
            (int(sys.argv[SEC_ARG]), int(sys.argv[THIRD_ARG]), RGB_SIZE))
        plot.plot(new_img)
Ejemplo n.º 8
0
def run_k_means(X, k):
    print("k=" + str(k) + ":")
    # initialize centroids
    centroid = init_centroids(X, k)
    print("iter 0: %s" % (print_cent(centroid)))
    # 10 iteration for update centroids
    for i in range(10):
        # find the next centroids
        centroid, loss = find_next_cent(centroid, X)
        # print number of iteration
        print("iter %d: %s" % (i + 1, print_cent(centroid)))
Ejemplo n.º 9
0
def main():
    path = 'dog.jpeg'
    # Iterate on the number of centroids.
    for k in [2, 4, 8, 16]:
        # data preparation (loading, normalizing, reshaping)
        A = imread(path)
        A = A.astype(float) / 255.
        img_size = A.shape
        X = A.reshape(img_size[0] * img_size[1], img_size[2])
        print('k=' + repr(k) + ':')
        centroids = init_centroids(X, k)

        for i in range(11):
            cluster_array = create_array(k)
            print('iter ' + repr(i) + ': ', end="")

            array = []
            for centroid in centroids:
                array2 = []
                for value in centroid:
                    strVal = str(floor(value * 100) / 100)
                    if strVal == '0.0':
                        array2.append('0.')
                    else:
                        array2.append(strVal)
                array.append(array2)

            # Printing the values ​​of the centroids.
            y = ", ".join(map(str, array))
            for centroid in y:
                x = ", ".join(centroid)
                print(x.replace("'", ""), end="")
            print("")

            # pixel classification to a centroid and adding it to the appropriate cluster.
            for pixel in X:
                index = classify(centroids, pixel)
                assign(pixel, cluster_array[index])

            # Update a new location for each centroid to the average of all the pixels in the same cluster.
            for index, cluster in enumerate(cluster_array):
                if cluster.numOfPixels != 0:
                    centroids[index][0] = cluster.redSum / cluster.numOfPixels
                    centroids[index][
                        1] = cluster.greenSum / cluster.numOfPixels
                    centroids[index][2] = cluster.blueSum / cluster.numOfPixels

        # Update the pixels in their new color
        for pixel in X:
            index = classify(centroids, pixel)
            centroid = centroids[index]
            pixel[0] = centroid[0]
            pixel[1] = centroid[1]
            pixel[2] = centroid[2]
Ejemplo n.º 10
0
def main():
    # run for k=2,4,8,16
    for i in range(1, 5):
        k = 2**i
        print("k=" + str(k) + ":")
        path = 'dog.jpeg'
        A = imread(path)
        A = A.astype(float) / 255.
        img_size = A.shape
        X = A.reshape(img_size[0] * img_size[1], img_size[2])
        centroids = init_centroids(X, k)
        k_means(X, centroids, k)
    return
Ejemplo n.º 11
0
def kmeans(X, k):
    # init k centroids and num iters
    centroids = init_centroids(X, k)
    num_iters = 10
    # print init centroids
    print_iter(0, centroids)
    # iterate num iterations
    for i in range(num_iters):
        # update clusters and centroids
        clusters = divide_clusters(centroids)
        centroids = update_centroids(centroids, clusters)
        # print_avg_loss(clusters, centroids, X)
        # print_iter(i + 1, centroids)
        print_iter2(i + 1, centroids)
Ejemplo n.º 12
0
def createImgByK(k):
    # data preperation (loading, normalizing, reshaping)
    path = 'dog.jpeg'
    A = imread(path)
    A_norm = A.astype(float) / 255.
    img_size = A_norm.shape
    X = A_norm.reshape(img_size[0] * img_size[1], img_size[2])
    # size of A_norm is 128
    lenA = len(A_norm)
    # centroid arr
    centroid = init_centroids(X, k)
    # initialize the copy arry of index
    centroidArr = [[0] * lenA for i in range(lenA)]
    lenCent = len(centroid)
    # run over all the centroid in iteration 0 and print
    print("iter 0 :", end="")
    iter = 0
    for indK in centroid:
        print(print_cent(indK), end = "")
        if (iter < lenCent - 1):
            print(", ", end="")
            iter += 1
    print("")
    # run over all the pixle with 10 iteration
    for i in range(10):
        for pHeight in range(lenA):
            for pWidth in range(lenA):
                temp = []
                for l in range(lenCent):
                    temp.append(0)
                for kIndex in range(lenCent):
                    temp[kIndex] = distance.euclidean(A_norm[pHeight][pWidth], centroid[kIndex])
                centroidArr[pHeight][pWidth] = findMin(temp)
        print("iter", i + 1, ":", end="")
        # run over all the centroid and update them
        for c in range(len(centroid)):
            centroid[c] = findAvg(A_norm, c, centroidArr)
        # run over all the centroid in all the iteration and print
        iter = 0
        for num in centroid:
            print(print_cent(num), end = "")
            if (iter < lenCent - 1):
                print(", ",end = "")
                iter += 1
        print("")
    # run over all the pixle in A_norm and update
    for a in range(lenA):
        for b in range(lenA):
            A_norm[a][b] = centroid[centroidArr[a][b]]
Ejemplo n.º 13
0
def divide(X, k):
    C = init_centroids(X, k)
    clusters = np.zeros(len(X))
    # Loop will run till the error becomes zero
    for b in range(11):
        # Assigning each value to its closest cluster
        for i in range(len(X)):
            distances = distance(X[i], C)
            cluster = np.argmin(distances)
            clusters[i] = cluster
        # Finding the new centroids by taking the average value
        printIter(b, C)
        for i in range(k):
            points = [X[j] for j in range(len(X)) if clusters[j] == i]
            C[i] = np.mean(points, axis=0)
    print_image('dog.jpeg', C)
Ejemplo n.º 14
0
def KMeans(X, k):
    # init centroids and clusters
    C = init_centroids(X, k)
    clusters = np.zeros(len(X))
    # 10 iterations
    for iter in range(11):
        # Assigning each value to its closest cluster
        for i in range(len(X)):
            distances = distance(X[i], C)
            cluster = np.argmin(distances)
            clusters[i] = cluster
        # Finding the new centroids by taking the average value
        print_iter(iter, C)
        for i in range(k):
            points = [X[j] for j in range(len(X)) if clusters[j] == i]
            C[i] = np.mean(points, axis=0)
Ejemplo n.º 15
0
def k_means_algo(pixels, k):
    """
    K means algorithm - calculate centroids.
    :param pixels: pixels array
    :param k: number of means
    :return: loss map for each centroid
    """
    centroids_initialize = init_centroids(pixels, k)
    print('k=' + str(k) + ':')
    loss_map = []

    # run 11 epochs
    for iter in range(11):
        print(print_centroids(centroids_initialize, iter))
        dict_centroids = {}
        loss = 0

        # run on each pixel
        for pixle in pixels:
            minimum = float('inf')
            for index in range(k):

                # calculate the euclidean square distance between the pixel to the centroid
                min_dist = pow(
                    np.linalg.norm(centroids_initialize[index] - pixle), 2)

                # choose the minimal distance
                if min_dist < minimum:
                    minimum = min_dist
                    index_min = index

            loss += minimum
            # add to dictionary the pixel with minimum distance with key= index of centroid
            try:
                dict_centroids[index_min].append(pixle)
            except KeyError:
                dict_centroids[index_min] = [pixle]

        # calculate the average of each centroid
        for key in dict_centroids.keys():
            centroids_initialize[key] = np.average(dict_centroids[key], axis=0)
        loss_avg = loss / float(len(pixels))
        loss_map.append(loss_avg)

    return loss_map
Ejemplo n.º 16
0
def main():
    # data preparation (loading, normalizing, reshaping)
    path = 'dog.jpeg'
    A = imread(path)
    A_norm = A.astype(float) / 255.
    img_size = A_norm.shape
    X = A_norm.reshape(img_size[0] * img_size[1], img_size[2])

    # run over all the K values
    for i in range(1, 5):
        # the K value
        k = pow(2, i)
        # K centroids that are to be used in K-Means on the data set X
        initial_centroids = init_centroids.init_centroids(X, k)

        centroids = []
        for cent in initial_centroids:
            centroids.append(Centroid(cent))

        print("k=" + k.__str__() + ":")
        print("iter 0:", end='')
        print_centroids_locations(centroids)

        for j in range(1, 11):
            for pixel in X:
                min_dist = distance(pixel, centroids[0].get_location())
                new_cent = centroids[0]
                for cent in centroids:
                    dist = distance(pixel, cent.get_location())
                    if dist < min_dist:
                        min_dist = dist
                        new_cent = cent
                new_cent.assign_pixel(pixel)

            # update all the centroids location
            for cent in centroids:
                cent.update_location()

            # print centroids location
            print("iter " + "" + j.__str__() + ":", end='')
            print_centroids_locations(centroids)

            # clear all the assigned pixels
            for cent in centroids:
                cent.clear_pixels()
Ejemplo n.º 17
0
def KMeans(pic, k):
    print("k = ", k)
    centroids = init_centroids(pic, k)
    printIter(0, centroids)
    for i in range(1, 11):  # loop 10 times
        clusters = [[] for c in range(0, k)]    #create K clusters
        for pixel in pic:
            centIdx = classify(pixel, centroids)
            clusters[centIdx].append(pixel)
        #calculate average of cluster for new centroid value
        for idx,cluster in enumerate(clusters):
            sum = 0
            for pixel in cluster:   #sum pixel values in each cluster
                sum += pixel
            lenC = len(cluster)
            if lenC != 0:
                avg = sum / lenC
                centroids[idx] = avg
        printIter(i, centroids)
Ejemplo n.º 18
0
def k_means(X,K):
    centroids = init_centroids(K)
    print("iter 0: {}".format(arr2str2(centroids)))
    mean_distance=[]
    for i in range(MAX_ITERS+1):
        distances_to_centroids = np.vstack([norm(X - c) for c in centroids]) # loop over array (first pass only)
        pixel_centroids = np.argmin(distances_to_centroids,axis=0)

        centroids = [np.mean(X[pixel_centroids==k,:],axis=0) for k in range(K)]
        
        total_dist=0.
        for col,row in enumerate(pixel_centroids):
            total_dist+=distances_to_centroids[row,col]
        mean_distance.append(total_dist/pixel_centroids.shape[0])
        centroid_str = arr2str2(centroids)
        if i < MAX_ITERS: # skip on last
            print("iter {}: {}".format(i+1, centroid_str))

    return centroids, pixel_centroids, mean_distance
Ejemplo n.º 19
0
def k_means(X, k):
    #print first iters
    print("k=" + str(k) + ":")
    centroids = init_centroids(X, k)
    print("iter 0: " + print_cent(centroids))
    clusters = []
    # if you want to compute loss
    #loss = []
    '''
    #array that match pixel and index in clusters array.
    #if you want to show the pic - take out of notes
    pixels = []
    '''
    for clust in centroids:
        clusters.append(Cluster(clust))
    for iter in range(10):
        for pix in X:
            #distance
            min = np.linalg.norm(pix - clusters[0].getRGB())
            min_k = 0
            count = 0
            for cl in clusters:
                #find minimal distance
                dist = np.linalg.norm(pix - cl.getRGB())
                if dist < min:
                    min = dist
                    min_k = count
                count += 1
            #add pixels
            clusters[min_k].add_pixel()
            clusters[min_k].add_rgb(pix)

            #if you want to compute loss
            #clusters[min_k].add_to_loss(pix)
            '''
            #if you want to see pic - take out of notes
            if iter is 9:
                pixels.append(min_k)
            '''
        #if you want to see the loss
        #loss.append(compute_loss(clusters,len(X)))
        #update avg
        for c in clusters:
            c.update()
            c.clear_pix()
            '''
            #if you want to see the loss
            if iter < 9:
                c.clear_loss()
            '''
        #print the array of pixels
        clusters_rgb = []
        for rgb in clusters:
            clusters_rgb.append(rgb.getRGB())
        print("iter " + str(iter + 1) + ": " + print_cent(clusters_rgb))
    '''
    #if you want to see the loss
    plt.plot(loss)
    plt.title('K = %d' %k)
    plt.ylabel('loss')
    plt.xlabel('iteration')
    plt.show()
    '''
    '''
Ejemplo n.º 20
0
            break

        updateCentroids(centroids, algorithmData)

    # plt.figure()
    # plt.plot(averageLoss)
    # plt.title("k = " + str(k))
    # plt.ylabel("avaragel loss")
    # plt.xlabel("iteration")
    # plt.show()


numpy.set_printoptions(precision=2)

for k in [2, 4, 8, 16]:
    centroids = init_centroids.init_centroids(load.A_norm, k)
    algorithmData = []

    for centroid in centroids:
        algorithmData.append([centroid, 0, [0, 0, 0]])

    print("k=", k, sep="", end=":\n")
    printData(centroids, algorithmData, k)

    imageCopy = load.A_norm
    if (k == 16):
        for row in range(0, load.img_size[0]):
            for col in range(0, load.img_size[1]):
                imageCopy[row][col] = closestColor(centroids,
                                                   load.A_norm[row][col])
        load.plt.imshow(imageCopy)
Ejemplo n.º 21
0
#    plt.xlabel("Iteration Number")
#    plt.show()


# each cell in this array is the centroid of the pixel
# for example: cell 0 will contain the centroid of pixel number 0
centerToPixel = []
for k in[2,4,8,16]:
    # data preparation (loading, normalizing, reshaping)
    path = 'dog.jpeg'
    A = imread(path)
    A = A.astype(float) / 255.
    img_size = A.shape
    X = A.reshape(img_size[0] * img_size[1], img_size[2])
    # initializing centroids
    centroids = init_centroids(X, k)
    print("k=%d:" % k)
    # array to store the loss of each iteration
    lossArray = []
    for i in range(0,11):
        centerToPixel = []
        print("iter %d: " % i, end='')
        print(print_cent(centroids))
        # distances sum of each iteration
        lossSum = 0
        # assign each pixel to its closest centroid
        for pixel in X:
            # find the minimal distance
            minDistance = np.linalg.norm(pixel - centroids[0])
            minDistance = minDistance * minDistance
            minCentroid = centroids[0]
Ejemplo n.º 22
0
        tmp_mean = tmp_arr.mean(axis=0)
        new_cents.append(tmp_mean)
    return np.asarray(new_cents)


if __name__ == '__main__':
    # define variables
    pic_path = 'dog.jpeg'
    k_arr = [2, 4, 8, 16]  # list of K values.
    iterations = 10  # num. of iterations for each K.
    # Get and normalize pixels
    A = imread(pic_path)
    A = A.astype(float) / 255.
    img_size = A.shape
    pixles = A.reshape(img_size[0] * img_size[1], img_size[2])
    # copy pixels matrix for plotting compressed image.
    compressed_img = np.array(pixles)

    # Perform k_means algorithm for X iterations and print the centroids.
    for i in range(len(k_arr)):
        centroids = cent.init_centroids(pixles, k_arr[i])
        avg_losses = []
        # printing O iteration (given centroids)
        print('k={0}:'.format(k_arr[i]))
        print_cents(centroids, 0)
        for j in range(1, iterations + 1):
            # update new centroids
            cents_list = k_mean(centroids, pixles, compressed_img, avg_losses)
            centroids = centroid_update(cents_list)
            print_cents(centroids, j)