def main_qmeans(X, U_init): lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart( left_dim=U_init.shape[0], right_dim=U_init.shape[1], nb_factors=paraman["--nb-factors"] + 1, sparsity_factor=paraman["--sparsity-factor"], residual_on_right=paraman["--residual-on-right"]) parameters_palm4msa = { "init_lambda": 1., "nb_iter": paraman["--nb-iteration-palm"], "lst_constraint_sets": lst_constraint_sets, "residual_on_right": paraman["--residual-on-right"] } start_qmeans = time.time() objective_values_q, final_centroids, indicator_vector_final = qmeans( X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], nb_factors=paraman["--nb-factors"] + 1, params_palm4msa=parameters_palm4msa, initialization=U_init, hierarchical_inside=paraman["--hierarchical"], ) stop_qmeans = time.time() qmeans_traintime = stop_qmeans - start_qmeans qmeans_results = {"traintime": qmeans_traintime} objprinter.add("qmeans_objective", ("after t", ), objective_values_q) resprinter.add(qmeans_results) return final_centroids, indicator_vector_final
def main_qmeans(X, U_init): """ Will perform the qmeans Algorithm on X with U_init as initialization. :param X: The input data in which to find the clusters. :param U_init: The initialization of the the clusters. :return: The final centroids as sparse factors, the indicator vector """ lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart(left_dim=U_init.shape[0], right_dim=U_init.shape[1], nb_factors=paraman["--nb-factors"] + 1, sparsity_factor=paraman["--sparsity-factor"], residual_on_right=paraman["--residual-on-right"]) parameters_palm4msa = { "init_lambda": 1., "nb_iter": paraman["--nb-iteration-palm"], "lst_constraint_sets": lst_constraint_sets, "residual_on_right": paraman["--residual-on-right"] } start_qmeans = time.process_time() objective_values_q, final_centroids, indicator_vector_final = qmeans(X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], nb_factors=paraman["--nb-factors"] + 1, params_palm4msa=parameters_palm4msa, initialization=U_init, hierarchical_inside=paraman["--hierarchical"], ) stop_qmeans = time.process_time() qmeans_traintime = stop_qmeans - start_qmeans qmeans_results = { "traintime": qmeans_traintime } objprinter.add("qmeans_objective", ("after t", ), objective_values_q) resprinter.add(qmeans_results) return final_centroids, indicator_vector_final
def main_qmeans(X, U_init): """ Will perform the qmeans Algorithm on X with U_init as initialization. :param X: The input data in which to find the clusters. :param U_init: The initialization of the the clusters. :return: The final centroids as sparse factors, the indicator vector """ lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart(left_dim=U_init.shape[0], right_dim=U_init.shape[1], nb_factors=paraman["--nb-factors"] + 1, sparsity_factor=paraman["--sparsity-factor"], residual_on_right=paraman["--residual-on-right"], fast_unstable_proj=True) parameters_palm4msa = { "init_lambda": 1., "nb_iter": paraman["--nb-iteration-palm"], "lst_constraint_sets": lst_constraint_sets, "residual_on_right": paraman["--residual-on-right"], "delta_objective_error_threshold": paraman["--delta-threshold"], "track_objective": False } start_qmeans = time.process_time() if paraman["--minibatch"]: objective_values_q, final_centroids, indicator_vector_final, lst_all_objective_functions_palm = qkmeans_minibatch( X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], nb_factors=paraman["--nb-factors"] + 1, params_palm4msa=parameters_palm4msa, initialization=U_init, batch_size=paraman["--minibatch"], hierarchical_inside=paraman["--hierarchical"], hierarchical_init=paraman["--hierarchical-init"] ) else: objective_values_q, final_centroids, indicator_vector_final, lst_all_objective_functions_palm = qmeans(X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], nb_factors=paraman["--nb-factors"] + 1, params_palm4msa=parameters_palm4msa, initialization=U_init, hierarchical_inside=paraman["--hierarchical"], hierarchical_init=paraman["--hierarchical-init"] ) stop_qmeans = time.process_time() qmeans_traintime = stop_qmeans - start_qmeans qmeans_results = { "traintime": qmeans_traintime } objprinter.add("qmeans_objective", ("after t", ), objective_values_q) if paraman["--hierarchical"]: objprinter.add("palm_objectives", ("nb_iter_qmeans", "nb_factor-1", "split-finetune", "nb_iter_palm", "nb_factor_tracked"), lst_all_objective_functions_palm) else: objprinter.add("palm_objectives", ("nb_iter_qmeans", "nb_iter_palm", "nb_factor_tracked"), lst_all_objective_functions_palm) resprinter.add(qmeans_results) return final_centroids, indicator_vector_final
"init_lambda": 1., "nb_iter": nb_iter_palm, "lst_constraint_sets": lst_constraints, "residual_on_right": True, "delta_objective_error_threshold": delta_objective_error_threshold_in_palm, "track_objective": False } logger.info('Running QuicK-means with H-Palm') # QKmeans with hierarchical palm4msa objective_function_with_hier_palm, op_centroids_hier, indicator_hier, lst_objective_function_hier_palm = \ qmeans(X, nb_clusters, nb_iter_kmeans, nb_factors, hierarchical_palm_init, initialization=U_centroids_hat, hierarchical_inside=True) # QKmeans with simple palm4msa logger.info('Running QuicK-means with Palm') objective_function_with_palm, op_centroids_palm, indicator_palm, lst_objective_function_palm = \ qmeans(X, nb_clusters, nb_iter_kmeans, nb_factors, hierarchical_palm_init, initialization=U_centroids_hat) # Kmeans with lloyd algorithm logger.info('Running K-means')
def main(small_dim): # Main code np.random.seed(0) if small_dim: nb_clusters = 10 nb_iter_kmeans = 10 n_samples = 1000 n_features = 20 n_centers = 50 nb_factors = 5 else: nb_clusters = 256 nb_iter_kmeans = 10 n_samples = 10000 n_features = 2048 n_centers = 4096 nb_factors = int(np.log2(min(nb_clusters, n_features))) X, _ = datasets.make_blobs(n_samples=n_samples, n_features=n_features, centers=n_centers) U_centroids_hat = X[np.random.permutation(X.shape[0])[:nb_clusters]] # kmeans++ initialization is not feasible because complexity is O(ndk)... residual_on_right = nb_clusters < n_features sparsity_factor = 2 nb_iter_palm = 300 delta_objective_error_threshold = 1e-6 lst_constraints, lst_constraints_vals = build_constraint_set_smart( U_centroids_hat.shape[0], U_centroids_hat.shape[1], nb_factors, sparsity_factor=sparsity_factor, residual_on_right=residual_on_right, fast_unstable_proj=True) logger.info("constraints: {}".format(pformat(lst_constraints_vals))) hierarchical_palm_init = { "init_lambda": 1., "nb_iter": nb_iter_palm, "lst_constraint_sets": lst_constraints, "residual_on_right": residual_on_right, "delta_objective_error_threshold": 1e-6, "track_objective": False, } # try: # logger.info('Running QuicK-means with H-Palm') # objective_function_with_hier_palm, op_centroids_hier, indicator_hier = \ # qmeans(X, nb_clusters, nb_iter_kmeans, # nb_factors, hierarchical_palm_init, # initialization=U_centroids_hat, # graphical_display=graphical_display, # hierarchical_inside=True) # # return_objective_function=True) logger.info('Running QuicK-means with Palm') objective_function_palm, op_centroids_palm, indicator_palm, _ = \ qmeans(X_data=X, K_nb_cluster=nb_clusters, nb_iter=nb_iter_kmeans, nb_factors=nb_factors, params_palm4msa=hierarchical_palm_init, initialization=U_centroids_hat, delta_objective_error_threshold=delta_objective_error_threshold) # return_objective_function=True) # except Exception as e: # logger.info("There have been a problem in qmeans: {}".format(str(e))) try: logger.info('Running K-means') objective_values_k, centroids_finaux, indicator_kmean = \ kmeans(X, nb_clusters, nb_iter_kmeans, initialization=U_centroids_hat) except SystemExit as e: logger.info("There have been a problem in kmeans: {}".format(str(e)))