def main_qmeans(X, U_init): lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart( left_dim=U_init.shape[0], right_dim=U_init.shape[1], nb_factors=paraman["--nb-factors"] + 1, sparsity_factor=paraman["--sparsity-factor"], residual_on_right=paraman["--residual-on-right"]) parameters_palm4msa = { "init_lambda": 1., "nb_iter": paraman["--nb-iteration-palm"], "lst_constraint_sets": lst_constraint_sets, "residual_on_right": paraman["--residual-on-right"] } start_qmeans = time.time() objective_values_q, final_centroids, indicator_vector_final = qmeans( X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], nb_factors=paraman["--nb-factors"] + 1, params_palm4msa=parameters_palm4msa, initialization=U_init, hierarchical_inside=paraman["--hierarchical"], ) stop_qmeans = time.time() qmeans_traintime = stop_qmeans - start_qmeans qmeans_results = {"traintime": qmeans_traintime} objprinter.add("qmeans_objective", ("after t", ), objective_values_q) resprinter.add(qmeans_results) return final_centroids, indicator_vector_final
def process_palm_on_top_of_kmeans(kmeans_centroids): lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart( left_dim=kmeans_centroids.shape[0], right_dim=kmeans_centroids.shape[1], nb_factors=paraman["--nb-factors"] + 1, sparsity_factor=paraman["--sparsity-factor"], residual_on_right=paraman["--residual-on-right"]) lst_factors = init_lst_factors(*kmeans_centroids.shape, paraman["--nb-factors"] + 1) eye_norm = np.sqrt(kmeans_centroids.shape[0]) _lambda_tmp, op_factors, U_centroids, nb_iter_by_factor, objective_palm = \ hierarchical_palm4msa( arr_X_target=np.eye(kmeans_centroids.shape[0]) @ kmeans_centroids, lst_S_init=lst_factors, lst_dct_projection_function=lst_constraint_sets, f_lambda_init=1. * eye_norm, nb_iter=paraman["--nb-iteration-palm"], update_right_to_left=True, residual_on_right=paraman["--residual-on-right"], graphical_display=False) _lambda = _lambda_tmp / eye_norm lst_factors_ = op_factors.get_list_of_factors() op_centroids = SparseFactors([lst_factors_[1] * _lambda] + lst_factors_[2:]) return op_centroids
def pyqalm_hierarchical_hadamard(H, d): # Parameters for palm nb_iter = 30 nb_factors = int(np.log2(d)) sparsity_factor = 2 # Create init sparse factors as identity (the first sparse matrix will remain constant) lst_factors = [np.eye(d) for _ in range(nb_factors)] lst_factors[-1] = np.zeros((d, d)) _lambda = 1. # init the scaling factor at 1 # Create the projection operators for each factor lst_proj_op_by_fac_step, lst_proj_op_by_fac_step_desc = build_constraint_set_smart(left_dim=d, right_dim=d, nb_factors=nb_factors, sparsity_factor=sparsity_factor, residual_on_right=True, fast_unstable_proj=False, constant_first=False) # Call the algorithm final_lambda, final_factors, final_X, _, _ = hierarchical_palm4msa( arr_X_target=H, lst_S_init=lst_factors, lst_dct_projection_function=lst_proj_op_by_fac_step, f_lambda_init=_lambda, nb_iter=nb_iter, update_right_to_left=True, residual_on_right=True) return final_lambda, lst_factors, final_factors, final_X
def test_compare_qmeans(self): lst_constraints, lst_constraints_vals = build_constraint_set_smart( self.U_centroids_hat.shape[0], self.U_centroids_hat.shape[1], self.nb_factors, sparsity_factor=self.sparsity_factor, residual_on_right=self.residual_on_right) hierarchical_palm_init = { "init_lambda": 1., "nb_iter": self.nb_iter_palm, "lst_constraint_sets": lst_constraints, "residual_on_right": self.residual_on_right } for hierarchical_inside in (True, False): print(hierarchical_inside) objective_values_q_fast, op_centroids, t_fast = \ qmeans_fast(self.X, self.nb_clusters, self.nb_iter_kmeans, self.nb_factors, hierarchical_palm_init, initialization=self.U_centroids_hat, hierarchical_inside=hierarchical_inside) objective_values_q_slow, U, t_slow = \ qmeans_slow(self.X, self.nb_clusters, self.nb_iter_kmeans, self.nb_factors, hierarchical_palm_init, initialization=self.U_centroids_hat, graphical_display=False, hierarchical_inside=hierarchical_inside) np.testing.assert_array_almost_equal(objective_values_q_fast, objective_values_q_slow[:, 1]) np.testing.assert_array_almost_equal(t_fast, t_slow)
def test_run_qmeans_fast(self): lst_constraints, lst_constraints_vals = build_constraint_set_smart( self.U_centroids_hat.shape[0], self.U_centroids_hat.shape[1], self.nb_factors, sparsity_factor=self.sparsity_factor, residual_on_right=self.residual_on_right) hierarchical_palm_init = { "init_lambda": 1., "nb_iter": self.nb_iter_palm, "lst_constraint_sets": lst_constraints, "residual_on_right": self.residual_on_right, "delta_objective_error_threshold": self.delta_objective_error_threshold, "track_objective": True } for hierarchical_inside in (True, False): print(hierarchical_inside) objective_values_q_fast, op_centroids, t_fast, lst_obj_palm = \ qmeans_fast(self.X, self.nb_clusters, self.nb_iter_kmeans, self.nb_factors, hierarchical_palm_init, initialization=self.U_centroids_hat, hierarchical_inside=hierarchical_inside) self.assertEqual(len(op_centroids), self.nb_factors - 1) self.assertTrue( len(objective_values_q_fast) <= self.nb_iter_kmeans) self.assertEqual(len(t_fast), len(self.X)) self.assertEqual(len(lst_obj_palm), len(objective_values_q_fast) + 1) if hierarchical_inside == True: self.assertTrue( all( len(obj_palm) == self.nb_factors - 1 for obj_palm in lst_obj_palm)) self.assertTrue( all( all( len(sub_palm_split_fine) == 2 for sub_palm_split_fine in obj_palm) for obj_palm in lst_obj_palm)) else: self.assertTrue( all( len(obj_palm) == self.nb_iter_palm for obj_palm in lst_obj_palm))
def main_qmeans(X, U_init): """ Will perform the qmeans Algorithm on X with U_init as initialization. :param X: The input data in which to find the clusters. :param U_init: The initialization of the the clusters. :return: The final centroids as sparse factors, the indicator vector """ lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart(left_dim=U_init.shape[0], right_dim=U_init.shape[1], nb_factors=paraman["--nb-factors"] + 1, sparsity_factor=paraman["--sparsity-factor"], residual_on_right=paraman["--residual-on-right"]) parameters_palm4msa = { "init_lambda": 1., "nb_iter": paraman["--nb-iteration-palm"], "lst_constraint_sets": lst_constraint_sets, "residual_on_right": paraman["--residual-on-right"], "delta_objective_error_threshold": paraman["--delta-threshold"], "track_objective": False } start_qmeans = time.process_time() objective_values_q, final_centroids, indicator_vector_final, lst_all_objective_functions_palm = qmeans(X_data=X, K_nb_cluster=paraman["--nb-cluster"], nb_iter=paraman["--nb-iteration"], nb_factors=paraman["--nb-factors"] + 1, params_palm4msa=parameters_palm4msa, initialization=U_init, hierarchical_inside=paraman["--hierarchical"], ) stop_qmeans = time.process_time() qmeans_traintime = stop_qmeans - start_qmeans qmeans_results = { "traintime": qmeans_traintime } objprinter.add("qmeans_objective", ("after t", ), objective_values_q) if paraman["--hierarchical"]: objprinter.add("palm_objectives", ("nb_iter_qmeans", "nb_factor-1", "split-finetune", "nb_iter_palm", "nb_factor_tracked"), lst_all_objective_functions_palm) else: objprinter.add("palm_objectives", ("nb_iter_qmeans", "nb_iter_palm", "nb_factor_tracked"), lst_all_objective_functions_palm) resprinter.add(qmeans_results) return final_centroids, indicator_vector_final
def process_palm_on_top_of_kmeans(kmeans_centroids): lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart( left_dim=kmeans_centroids.shape[0], right_dim=kmeans_centroids.shape[1], nb_factors=paraman["--nb-factors"] + 1, sparsity_factor=paraman["--sparsity-factor"], residual_on_right=paraman["--residual-on-right"], fast_unstable_proj=True) lst_factors = init_lst_factors(*kmeans_centroids.shape, paraman["--nb-factors"] + 1) eye_norm = np.sqrt(kmeans_centroids.shape[0]) if paraman["--hierarchical"]: _lambda_tmp, op_factors, U_centroids, nb_iter_by_factor, objective_palm = \ hierarchical_palm4msa( arr_X_target=np.eye(kmeans_centroids.shape[0]) @ kmeans_centroids, lst_S_init=lst_factors, lst_dct_projection_function=lst_constraint_sets, f_lambda_init=1. * eye_norm, nb_iter=paraman["--nb-iteration-palm"], update_right_to_left=True, residual_on_right=paraman["--residual-on-right"], delta_objective_error_threshold_palm=paraman["--delta-threshold"], track_objective_palm=False) else: _lambda_tmp, op_factors, _, objective_palm, nb_iter_palm = \ palm4msa(arr_X_target=np.eye(kmeans_centroids.shape[0]) @ kmeans_centroids, lst_S_init=lst_factors, nb_factors=len(lst_factors), lst_projection_functions=lst_constraint_sets[-1]["finetune"], f_lambda_init=1. * eye_norm, nb_iter=paraman["--nb-iteration-palm"], update_right_to_left=True, delta_objective_error_threshold=paraman["--delta-threshold"], track_objective=False) log_memory_usage( "Memory after palm on top of kmeans in process_palm_on_top_of_kmeans") _lambda = _lambda_tmp / eye_norm lst_factors_ = op_factors.get_list_of_factors() op_centroids = SparseFactors([lst_factors_[1] * _lambda] + lst_factors_[2:]) return op_centroids
def test_build_constraint_set(self): left_dim = 10 right_dim = 32 nb_fac = 4 sparsity_factor = 2 residual_on_right = False lst_constraints, lst_constraints_vals = build_constraint_set_smart( left_dim, right_dim, nb_fac, sparsity_factor=sparsity_factor, residual_on_right=residual_on_right, constant_first=True, hierarchical=False) print(lst_constraints_vals)
# Parameters for palm nb_iter = 300 nb_factors = int(np.log2(32)) sparsity_factor = 2 # Create init sparse factors as identity (the first sparse matrix will remain constant) lst_factors = [np.eye(d) for _ in range(nb_factors)] lst_factors[-1] = np.zeros((d, d)) _lambda = 1. # init the scaling factor at 1 # Create the projection operators for each factor lst_proj_op_by_fac_step, lst_proj_op_by_fac_step_desc = build_constraint_set_smart( left_dim=d, right_dim=d, nb_factors=nb_factors, sparsity_factor=sparsity_factor, residual_on_right=True, fast_unstable_proj=False, constant_first=False) logger.info( "Description of projection operators for each iteration of hierarchical_palm: \n{}" .format(pprint.pformat(lst_proj_op_by_fac_step_desc))) print(np.__version__) # Call the algorithm final_lambda, final_factors, final_X, _, _ = hierarchical_palm4msa( arr_X_target=H, lst_S_init=lst_factors, lst_dct_projection_function=lst_proj_op_by_fac_step, f_lambda_init=_lambda,
nb_factors = int(np.log2(min(nb_clusters, n_features))) X, _ = datasets.make_blobs(n_samples=n_samples, n_features=n_features, centers=n_centers) U_centroids_hat = X[np.random.permutation(X.shape[0])[:nb_clusters]] # kmeans++ initialization is not feasible because complexity is O(ndk)... residual_on_right = True sparsity_factor = 2 nb_iter_palm = 30 delta_objective_error_threshold_in_palm = 1e-6 track_objective_in_palm = True lst_constraints, lst_constraints_vals = build_constraint_set_smart( U_centroids_hat.shape[0], U_centroids_hat.shape[1], nb_factors, sparsity_factor=sparsity_factor, residual_on_right=residual_on_right) logger.info("constraints: {}".format(pformat(lst_constraints_vals))) hierarchical_palm_init = { "init_lambda": 1., "nb_iter": nb_iter_palm, "lst_constraint_sets": lst_constraints, "residual_on_right": residual_on_right, "delta_objective_error_threshold": delta_objective_error_threshold_in_palm, "track_objective": track_objective_in_palm } logger.info('Running QuicK-means with H-Palm') objective_function_with_hier_palm, op_centroids_hier, indicator_hier, lst_objective_function_hier_palm = \
X.shape[0] )[: nb_clusters]] # kmeans++ initialization is not feasible because complexity is O(ndk)... nb_factors = 5 sparsity_factor = 2 nb_iter_palm = 300 residual_on_right = False # lst_constraints, lst_constraints_vals = build_constraint_sets(U_centroids_hat.shape[0], U_centroids_hat.shape[1], nb_factors, sparsity_factor=sparsity_factor) K = U_centroids_hat.shape[0] d = U_centroids_hat.shape[1] lst_constraints, lst_constraints_vals = build_constraint_set_smart( K, d, nb_factors, sparsity_factor=sparsity_factor, residual_on_right=residual_on_right) logger.info("constraints: {}".format(pformat(lst_constraints_vals))) hierarchical_palm_init = { "init_lambda": 1., "nb_iter": nb_iter_palm, "lst_constraint_sets": lst_constraints, "residual_on_right": residual_on_right } # try: objective_values_q_hier, centroids_finaux_q_hier, indicator_hier = qmeans( X, nb_clusters,
def main(small_dim): # Main code np.random.seed(0) if small_dim: nb_clusters = 10 nb_iter_kmeans = 10 n_samples = 1000 n_features = 20 n_centers = 50 nb_factors = 5 else: nb_clusters = 256 nb_iter_kmeans = 10 n_samples = 10000 n_features = 2048 n_centers = 4096 nb_factors = int(np.log2(min(nb_clusters, n_features))) X, _ = datasets.make_blobs(n_samples=n_samples, n_features=n_features, centers=n_centers) U_centroids_hat = X[np.random.permutation(X.shape[0])[:nb_clusters]] # kmeans++ initialization is not feasible because complexity is O(ndk)... residual_on_right = nb_clusters < n_features sparsity_factor = 2 nb_iter_palm = 300 delta_objective_error_threshold = 1e-6 lst_constraints, lst_constraints_vals = build_constraint_set_smart( U_centroids_hat.shape[0], U_centroids_hat.shape[1], nb_factors, sparsity_factor=sparsity_factor, residual_on_right=residual_on_right, fast_unstable_proj=True) logger.info("constraints: {}".format(pformat(lst_constraints_vals))) hierarchical_palm_init = { "init_lambda": 1., "nb_iter": nb_iter_palm, "lst_constraint_sets": lst_constraints, "residual_on_right": residual_on_right, "delta_objective_error_threshold": 1e-6, "track_objective": False, } # try: # logger.info('Running QuicK-means with H-Palm') # objective_function_with_hier_palm, op_centroids_hier, indicator_hier = \ # qmeans(X, nb_clusters, nb_iter_kmeans, # nb_factors, hierarchical_palm_init, # initialization=U_centroids_hat, # graphical_display=graphical_display, # hierarchical_inside=True) # # return_objective_function=True) logger.info('Running QuicK-means with Palm') objective_function_palm, op_centroids_palm, indicator_palm, _ = \ qmeans(X_data=X, K_nb_cluster=nb_clusters, nb_iter=nb_iter_kmeans, nb_factors=nb_factors, params_palm4msa=hierarchical_palm_init, initialization=U_centroids_hat, delta_objective_error_threshold=delta_objective_error_threshold) # return_objective_function=True) # except Exception as e: # logger.info("There have been a problem in qmeans: {}".format(str(e))) try: logger.info('Running K-means') objective_values_k, centroids_finaux, indicator_kmean = \ kmeans(X, nb_clusters, nb_iter_kmeans, initialization=U_centroids_hat) except SystemExit as e: logger.info("There have been a problem in kmeans: {}".format(str(e)))
dims = [(d, d), (d, d // 2), (d // 2, d)] results = {} for n_fac in n_facs: for dim in dims: pair = dict() lst_factors = init_lst_factors(dim[0], dim[1], n_fac, first_square=False) # construit les contraintes de projection dans une liste lst_constraints, lst_constraints_vals = build_constraint_set_smart( dim[0], dim[1], n_fac, sparsity_factor=sparsity_fac, residual_on_right=True, fast_unstable_proj=False, constant_first=False) lst_constraints_palm = lst_constraints[-1]["finetune"] # construit la matrice cible X_target = np.random.rand(dim[0], dim[1]) # op_factor est en quelque sortes la liste des facteurs sparses _lambda, op_factors, _, _, _ = \ palm4msa( arr_X_target=X_target, lst_S_init=lst_factors, nb_factors=len(lst_factors), lst_projection_functions=lst_constraints_palm,