def kmean_statistics(images, options, kmax=10, nsamples=250): global_times = np.zeros((kmax-1)) global_scores = np.zeros((kmax-1)) global_iterations = np.zeros((kmax-1)) for ix, input in enumerate(images[:nsamples]): local_times = [] local_scores = [] local_iterations = [] kms = km.KMeans(input, 1, options) for k in range(1, kmax+1): start = time.time() kms.K = k kms.fit() score = kms.perform_score() end = time.time() elapsed = end - start global_times[k-2] += elapsed global_scores[k-2] += score global_iterations[k-2] += kms.num_iter # local_scores.append(score) # local_iterations.append(kms.num_iter) # local_times.append(elapsed) # print("Results for image " + str(ix) + " with k=" + str(k)) # print("Score: " + str(score)) # print("Iterations needed: " + str(kms.num_iter)) # print("Elapsed time: " + str(elapsed)) # print("") # visualize_k_means(kms, input.shape) # score_series = pd.Series(local_scores, index=list(range(2,kmax+1)), name="Score") # score_series.plot(legend=True) # plt.show() # iterations_series = pd.Series(local_iterations, index=list(range(2,kmax+1)), name="Iterations") # iterations_series.plot(legend=True) # plt.show() # time_series = pd.Series(local_times, index=list(range(2,kmax+1)), name="Time") # time_series.plot(legend=True) # plt.show() global_scores /= images.shape[0] global_iterations /= images.shape[0] global_times /= images.shape[0] score_series = pd.Series(global_scores, index=list(range(1,kmax+1)), name="Score") score_series.plot(legend=True) plt.show() iterations_series = pd.Series(global_iterations, index=list(range(1,kmax+1)), name="Iterations") iterations_series.plot(legend=True) plt.show() time_series = pd.Series(global_times, index=list(range(1,kmax+1)), name="Time") time_series.plot(legend=True) plt.show()
def kmeans_statistics(images, KMax): times = [] iterations = [] wcds = [] for i in range(2, KMax): km = Kmeans.KMeans(images, i) time1 = time.time() iterations_needed = km.fit() times.append(time.time() - time1) iterations.append(iterations_needed) wcds.append(km.whitinClassDistance()) return times, iterations, wcds
def get_color_predictions(images, max_k): # preds = np.empty((len(images), k), dtype='<U8') preds = [] for ix, input in enumerate(images): # S'ha observat que el nombre d'iteracions necessàries era proper a k*5. # Si el sobrepassa, és que no està essent eficient # La tolerància podria ser 0.05 però no val la pena kms = km.KMeans(input, 1, {"km_init": "kmeans++", "max_iter": max_k*5, "threshold": 0.35, "fitting": "DB", "tolerance": 0.1, "background_mask": 250}) kms.find_bestK(max_k) kms.fit() preds.append(km.get_colors(kms.centroids)) return np.array(preds)
def get_kmeans_accuracy(kmeans_labels_test, images, KMax, max_images_to_use, options): plt.clf() accerted_ratios_for_all_images = [] print("estimated time: 1 minute") if len(used_kmeans_images) != max_images_to_use: for i in range(len(used_kmeans_images), max_images_to_use): number_to_use = random.randint(0, images.shape[0]) used_kmeans_images.append(number_to_use) time1 = time.time() for number_to_use in used_kmeans_images: accerted_ratios = [] for j in range(2, KMax): km = Kmeans.KMeans(images[number_to_use], j, options) km.fit() returned_from_kmeans_color_labels = Kmeans.get_colors(km.centroids) accerted = get_color_accuracy(kmeans_labels_test[number_to_use], returned_from_kmeans_color_labels) #visualize_k_means(km, images[number_to_use].shape) accerted_ratios.append(accerted) accerted_ratios_for_all_images.append(accerted_ratios) for i in range(len(used_kmeans_images)): plt.scatter(list(range(2, KMax)), accerted_ratios_for_all_images[i], label="image " + str(used_kmeans_images[i])) plt.legend() plt.title("KMeans accerted % " + options["km_init"] + " ratio") plt.xlabel("K") plt.ylabel("accerted % ratios kmeans") plt.savefig(output_folder + "kmeans " + options["km_init"] + " Accerted.png") print(time.time() - time1)
start = time.time() # color_chosen = input('Choose a color') # query_color = Retrieval_by_color(test_imgs, test_color_labels, 'White') # visualize_retrieval(query_color, len(query_color)) # query_shape = Retrieval_by_shape(test_imgs, test_class_labels, 'Dresses') # visualize_retrieval(query_shape, len(query_shape)) # query_combined = Retrieval_combined(test_imgs, test_color_labels, test_class_labels, 'White', 'Dresses') # visualize_retrieval(query_combined, 20) elem_kmeans = [] elem_colors_kmeans = [] for img in test_imgs: elem_kmeans.append(km.KMeans(img)) elem_kmeans[-1].find_bestK_improved(4, 'FISHER_REAL_CENT') elem_colors_kmeans.append(km.get_colors(elem_kmeans[-1].centroids)) # Kmean_statistics(km.KMeans(test_imgs[0]), 4) ''' elem_knn = KNN.KNN(train_imgs, train_class_labels) # start = time.time() porcentaje_shape = Get_shape_accuracy(elem_knn.predict(test_imgs, 4), test_class_labels) # print("tiempo: ", time.time() - start) print(porcentaje_shape) ''' ''' for propio, solucion in zip(sorted(elem_colors_kmeans), sorted(test_color_labels)): print(propio)