def kmean_statistics(images, options, kmax=10, nsamples=250):
    global_times = np.zeros((kmax-1))
    global_scores = np.zeros((kmax-1))
    global_iterations = np.zeros((kmax-1))

    for ix, input in enumerate(images[:nsamples]):
        local_times = []
        local_scores = []
        local_iterations = []
        kms = km.KMeans(input, 1, options)

        for k in range(1, kmax+1):
            start = time.time()
            kms.K = k
            kms.fit()
            score = kms.perform_score()
            end = time.time()
            elapsed = end - start
            global_times[k-2] += elapsed
            global_scores[k-2] += score
            global_iterations[k-2] += kms.num_iter

            # local_scores.append(score)
            # local_iterations.append(kms.num_iter)
            # local_times.append(elapsed)
            # print("Results for image " + str(ix) + " with k=" + str(k)) 
            # print("Score: " + str(score))
            # print("Iterations needed: " + str(kms.num_iter))
            # print("Elapsed time: " + str(elapsed))
            # print("")
            # visualize_k_means(kms, input.shape)
        
        # score_series = pd.Series(local_scores, index=list(range(2,kmax+1)), name="Score")
        # score_series.plot(legend=True)
        # plt.show()
        # iterations_series = pd.Series(local_iterations, index=list(range(2,kmax+1)), name="Iterations")
        # iterations_series.plot(legend=True)
        # plt.show()
        # time_series = pd.Series(local_times, index=list(range(2,kmax+1)), name="Time")
        # time_series.plot(legend=True)
        # plt.show()

    global_scores /= images.shape[0]
    global_iterations /= images.shape[0]
    global_times /= images.shape[0]

    score_series = pd.Series(global_scores, index=list(range(1,kmax+1)), name="Score")
    score_series.plot(legend=True)
    plt.show()
    iterations_series = pd.Series(global_iterations, index=list(range(1,kmax+1)), name="Iterations")
    iterations_series.plot(legend=True)
    plt.show()
    time_series = pd.Series(global_times, index=list(range(1,kmax+1)), name="Time")
    time_series.plot(legend=True)
    plt.show()
def kmeans_statistics(images, KMax):
    times = []
    iterations = []
    wcds = []

    for i in range(2, KMax):
        km = Kmeans.KMeans(images, i)
        time1 = time.time()
        iterations_needed = km.fit()
        times.append(time.time() - time1)
        iterations.append(iterations_needed)
        wcds.append(km.whitinClassDistance())

    return times, iterations, wcds
def get_color_predictions(images, max_k):
    # preds = np.empty((len(images), k), dtype='<U8')
    preds = []

    for ix, input in enumerate(images):
        # S'ha observat que el nombre d'iteracions necessàries era proper a k*5. 
        # Si el sobrepassa, és que no està essent eficient
        # La tolerància podria ser 0.05 però no val la pena
        kms = km.KMeans(input, 1, {"km_init": "kmeans++", "max_iter": max_k*5, "threshold": 0.35, "fitting": "DB", "tolerance": 0.1, "background_mask": 250})
        kms.find_bestK(max_k)
        kms.fit()
        preds.append(km.get_colors(kms.centroids))

    return np.array(preds)
def get_kmeans_accuracy(kmeans_labels_test, images, KMax, max_images_to_use,
                        options):
    plt.clf()
    accerted_ratios_for_all_images = []
    print("estimated time: 1 minute")

    if len(used_kmeans_images) != max_images_to_use:
        for i in range(len(used_kmeans_images), max_images_to_use):
            number_to_use = random.randint(0, images.shape[0])
            used_kmeans_images.append(number_to_use)

    time1 = time.time()
    for number_to_use in used_kmeans_images:
        accerted_ratios = []

        for j in range(2, KMax):
            km = Kmeans.KMeans(images[number_to_use], j, options)
            km.fit()
            returned_from_kmeans_color_labels = Kmeans.get_colors(km.centroids)
            accerted = get_color_accuracy(kmeans_labels_test[number_to_use],
                                          returned_from_kmeans_color_labels)
            #visualize_k_means(km, images[number_to_use].shape)
            accerted_ratios.append(accerted)

        accerted_ratios_for_all_images.append(accerted_ratios)

    for i in range(len(used_kmeans_images)):
        plt.scatter(list(range(2, KMax)),
                    accerted_ratios_for_all_images[i],
                    label="image " + str(used_kmeans_images[i]))
        plt.legend()
        plt.title("KMeans accerted % " + options["km_init"] + " ratio")
        plt.xlabel("K")
        plt.ylabel("accerted % ratios kmeans")
        plt.savefig(output_folder + "kmeans " + options["km_init"] +
                    " Accerted.png")

    print(time.time() - time1)
Ejemplo n.º 5
0
    start = time.time()

    # color_chosen = input('Choose a color')
    # query_color = Retrieval_by_color(test_imgs, test_color_labels, 'White')
    # visualize_retrieval(query_color, len(query_color))

    # query_shape = Retrieval_by_shape(test_imgs, test_class_labels, 'Dresses')
    # visualize_retrieval(query_shape, len(query_shape))

    # query_combined = Retrieval_combined(test_imgs, test_color_labels, test_class_labels, 'White', 'Dresses')
    # visualize_retrieval(query_combined, 20)

    elem_kmeans = []
    elem_colors_kmeans = []
    for img in test_imgs:
        elem_kmeans.append(km.KMeans(img))
        elem_kmeans[-1].find_bestK_improved(4, 'FISHER_REAL_CENT')
        elem_colors_kmeans.append(km.get_colors(elem_kmeans[-1].centroids))

    # Kmean_statistics(km.KMeans(test_imgs[0]), 4)
    '''

    elem_knn = KNN.KNN(train_imgs, train_class_labels)
    # start = time.time()
    porcentaje_shape = Get_shape_accuracy(elem_knn.predict(test_imgs, 4), test_class_labels)
    # print("tiempo: ", time.time() - start)
    print(porcentaje_shape)
    
    ''' '''
    for propio, solucion in zip(sorted(elem_colors_kmeans), sorted(test_color_labels)):
        print(propio)