def main_kmeans(X, U_init): """ Will perform the k means algorithm on X with U_init as initialization :param X: The input data in which to find the clusters. :param U_init: The initialization of the the clusters. :return: The final centroids, the indicator vector """ start_kmeans = time.process_time() if paraman["--minibatch"]: objective_values_k, final_centroids, indicator_vector_final = kmeans_minibatch(X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], initialization=U_init, batch_size=paraman["--minibatch"] ) else: objective_values_k, final_centroids, indicator_vector_final = kmeans(X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], initialization=U_init) stop_kmeans = time.process_time() kmeans_traintime = stop_kmeans - start_kmeans kmeans_results = { "traintime": kmeans_traintime } objprinter.add("kmeans_objective", ("after t", ), objective_values_k) resprinter.add(kmeans_results) return final_centroids, indicator_vector_final
def main_kmeans(X, U_init): start_kmeans = time.time() objective_values_k, final_centroids, indicator_vector_final = kmeans( X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], initialization=U_init) stop_kmeans = time.time() kmeans_traintime = stop_kmeans - start_kmeans kmeans_results = {"traintime": kmeans_traintime} objprinter.add("kmeans_objective", ("after t", ), objective_values_k) resprinter.add(kmeans_results) return final_centroids, indicator_vector_final
def main_kmeans(X, U_init): """ Will perform the k means algorithm on X with U_init as initialization :param X: The input data in which to find the clusters. :param U_init: The initialization of the the clusters. :return: The final centroids, the indicator vector """ if paraman["--minibatch"]: objective_values_k, final_centroids, indicator_vector_final = kmeans_minibatch(X_data=X, K_nb_cluster=U_init.shape[0], nb_iter=paraman["--nb-iteration"], initialization=U_init, batch_size=paraman["--minibatch"] ) else: objective_values_k, final_centroids, indicator_vector_final = kmeans(X_data=X, K_nb_cluster=U_init.shape[0], nb_iter=paraman["--nb-iteration"], initialization=U_init) return final_centroids, indicator_vector_final
nb_iter_kmeans, nb_factors, hierarchical_palm_init, initialization=U_centroids_hat, hierarchical_inside=True) logger.info('Running QuicK-means with Palm') objective_function_with_palm, op_centroids_palm, indicator_palm, lst_objective_function_palm = \ qmeans(X, nb_clusters, nb_iter_kmeans, nb_factors, hierarchical_palm_init, initialization=U_centroids_hat) try: logger.info('Running K-means') objective_values_k, centroids_finaux, indicator_kmean = \ kmeans(X, nb_clusters, nb_iter_kmeans, initialization=U_centroids_hat) except SystemExit as e: logger.info("There have been a problem in kmeans: {}".format(str(e))) logger.info('Display') plt.figure() plt.plot(np.arange(len(objective_function_with_hier_palm)), objective_function_with_hier_palm, marker="x", label="hierarchical") plt.plot(np.arange(len(objective_function_with_palm)), objective_function_with_palm, marker="x", label="palm") plt.plot(np.arange(len(objective_values_k)), objective_values_k, marker="x", label="kmeans") handles, labels = plt.gca().get_legend_handles_labels() by_label = OrderedDict(zip(labels, handles)) plt.legend(by_label.values(), by_label.keys()) plt.show()
graphical_display=False, hierarchical_inside=True) # objective_values_q_hier, centroids_finaux_q_hier, indicator = qmeans(X, nb_clusters, nb_iter_kmeans, nb_factors, hierarchical_palm_init, initialization=U_centroids_hat, graphical_display=True, hierarchical_inside=True) objective_values_q, centroids_finaux_q, indicator = qmeans( X, nb_clusters, nb_iter_kmeans, nb_factors, hierarchical_palm_init, initialization=U_centroids_hat, graphical_display=False) # except Exception as e: # logger.info("There have been a problem in qmeans: {}".format(str(e))) try: objective_values_k, centroids_finaux, indicator = kmeans( X, nb_clusters, nb_iter_kmeans, initialization=U_centroids_hat) except SystemExit as e: logger.info("There have been a problem in kmeans: {}".format(str(e))) plt.figure() # plt.yscale("log") plt.scatter(np.arange(len(objective_values_q) - 1) + 0.5, objective_values_q[1:, 0], marker="x", label="qmeans after palm(0)", color="b") plt.scatter((2 * np.arange(len(objective_values_q)) + 1) / 2 - 0.5, objective_values_q[:, 1], marker="x", label="qmeans after t (1)",
def main_kmeans(X, U_init): """ Will perform the k means algorithm on X with U_init as initialization :param X: The input data in which to find the clusters. :param U_init: The initialization of the the clusters. :return: The final centroids, the indicator vector """ if paraman["--l1-proj"] is not None and paraman["--lambda-l1-proj"] is None: if paraman["--blobs"] is not None: param_lambda = None elif paraman["--caltech256"] is not None: param_lambda = 868 elif paraman["--census"]: param_lambda = None elif paraman["--kddcup04"]: param_lambda = None elif paraman["--kddcup99"]: param_lambda = 4.9 elif paraman["--plants"]: param_lambda = None elif paraman["--breast-cancer"]: param_lambda = 600 elif paraman["--covtype"]: param_lambda = 964 elif paraman["--mnist"]: param_lambda = 2550 elif paraman["--fashion-mnist"]: param_lambda = 1485 elif paraman["--light-blobs"]: param_lambda = None elif paraman["--lfw"]: param_lambda = None elif paraman["--million-blobs"] is not None: param_lambda = None elif paraman["--coil20"] is not None: param_lambda = 5.6 else: raise NotImplementedError("Unknown dataset.") else: param_lambda = paraman["--lambda-l1-proj"] start_kmeans = time.process_time() if paraman["--minibatch"]: objective_values_k, final_centroids, indicator_vector_final = kmeans_minibatch( X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], initialization=U_init, batch_size=paraman["--minibatch"], proj_l1=paraman["--l1-proj"], _lambda=param_lambda, epsilon=paraman["--epsilon-tol-proj"]) else: objective_values_k, final_centroids, indicator_vector_final = kmeans( X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], initialization=U_init, proj_l1=paraman["--l1-proj"], _lambda=param_lambda, epsilon=paraman["--epsilon-tol-proj"]) stop_kmeans = time.process_time() kmeans_traintime = stop_kmeans - start_kmeans print(objective_values_k[-1]) kmeans_results = { "traintime": kmeans_traintime, "actual_param_lambda": param_lambda } objprinter.add("kmeans_objective", ("after t", ), objective_values_k) resprinter.add(kmeans_results) return final_centroids, indicator_vector_final
def main(small_dim): # Main code np.random.seed(0) if small_dim: nb_clusters = 10 nb_iter_kmeans = 10 n_samples = 1000 n_features = 20 n_centers = 50 nb_factors = 5 else: nb_clusters = 256 nb_iter_kmeans = 10 n_samples = 10000 n_features = 2048 n_centers = 4096 nb_factors = int(np.log2(min(nb_clusters, n_features))) X, _ = datasets.make_blobs(n_samples=n_samples, n_features=n_features, centers=n_centers) U_centroids_hat = X[np.random.permutation(X.shape[0])[:nb_clusters]] # kmeans++ initialization is not feasible because complexity is O(ndk)... residual_on_right = nb_clusters < n_features sparsity_factor = 2 nb_iter_palm = 300 delta_objective_error_threshold = 1e-6 lst_constraints, lst_constraints_vals = build_constraint_set_smart( U_centroids_hat.shape[0], U_centroids_hat.shape[1], nb_factors, sparsity_factor=sparsity_factor, residual_on_right=residual_on_right, fast_unstable_proj=True) logger.info("constraints: {}".format(pformat(lst_constraints_vals))) hierarchical_palm_init = { "init_lambda": 1., "nb_iter": nb_iter_palm, "lst_constraint_sets": lst_constraints, "residual_on_right": residual_on_right, "delta_objective_error_threshold": 1e-6, "track_objective": False, } # try: # logger.info('Running QuicK-means with H-Palm') # objective_function_with_hier_palm, op_centroids_hier, indicator_hier = \ # qmeans(X, nb_clusters, nb_iter_kmeans, # nb_factors, hierarchical_palm_init, # initialization=U_centroids_hat, # graphical_display=graphical_display, # hierarchical_inside=True) # # return_objective_function=True) logger.info('Running QuicK-means with Palm') objective_function_palm, op_centroids_palm, indicator_palm, _ = \ qmeans(X_data=X, K_nb_cluster=nb_clusters, nb_iter=nb_iter_kmeans, nb_factors=nb_factors, params_palm4msa=hierarchical_palm_init, initialization=U_centroids_hat, delta_objective_error_threshold=delta_objective_error_threshold) # return_objective_function=True) # except Exception as e: # logger.info("There have been a problem in qmeans: {}".format(str(e))) try: logger.info('Running K-means') objective_values_k, centroids_finaux, indicator_kmean = \ kmeans(X, nb_clusters, nb_iter_kmeans, initialization=U_centroids_hat) except SystemExit as e: logger.info("There have been a problem in kmeans: {}".format(str(e)))