def main_qmeans(X, U_init):
    lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart(
        left_dim=U_init.shape[0],
        right_dim=U_init.shape[1],
        nb_factors=paraman["--nb-factors"] + 1,
        sparsity_factor=paraman["--sparsity-factor"],
        residual_on_right=paraman["--residual-on-right"])

    parameters_palm4msa = {
        "init_lambda": 1.,
        "nb_iter": paraman["--nb-iteration-palm"],
        "lst_constraint_sets": lst_constraint_sets,
        "residual_on_right": paraman["--residual-on-right"]
    }

    start_qmeans = time.time()
    objective_values_q, final_centroids, indicator_vector_final = qmeans(
        X_data=X,
        K_nb_cluster=paraman["--nb-cluster"],
        nb_iter=paraman["--nb-iteration"],
        nb_factors=paraman["--nb-factors"] + 1,
        params_palm4msa=parameters_palm4msa,
        initialization=U_init,
        hierarchical_inside=paraman["--hierarchical"],
    )
    stop_qmeans = time.time()
    qmeans_traintime = stop_qmeans - start_qmeans
    qmeans_results = {"traintime": qmeans_traintime}

    objprinter.add("qmeans_objective", ("after t", ), objective_values_q)
    resprinter.add(qmeans_results)

    return final_centroids, indicator_vector_final
Beispiel #2
0
def process_palm_on_top_of_kmeans(kmeans_centroids):
    lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart(
        left_dim=kmeans_centroids.shape[0],
        right_dim=kmeans_centroids.shape[1],
        nb_factors=paraman["--nb-factors"] + 1,
        sparsity_factor=paraman["--sparsity-factor"],
        residual_on_right=paraman["--residual-on-right"])

    lst_factors = init_lst_factors(*kmeans_centroids.shape,
                                   paraman["--nb-factors"] + 1)

    eye_norm = np.sqrt(kmeans_centroids.shape[0])

    _lambda_tmp, op_factors, U_centroids, nb_iter_by_factor, objective_palm = \
        hierarchical_palm4msa(
            arr_X_target=np.eye(kmeans_centroids.shape[0]) @ kmeans_centroids,
            lst_S_init=lst_factors,
            lst_dct_projection_function=lst_constraint_sets,
            f_lambda_init=1. * eye_norm,
            nb_iter=paraman["--nb-iteration-palm"],
            update_right_to_left=True,
            residual_on_right=paraman["--residual-on-right"],
            graphical_display=False)

    _lambda = _lambda_tmp / eye_norm
    lst_factors_ = op_factors.get_list_of_factors()
    op_centroids = SparseFactors([lst_factors_[1] * _lambda] +
                                 lst_factors_[2:])

    return op_centroids
def pyqalm_hierarchical_hadamard(H, d):
    # Parameters for palm
    nb_iter = 30
    nb_factors = int(np.log2(d))
    sparsity_factor = 2

    # Create init sparse factors as identity (the first sparse matrix will remain constant)
    lst_factors = [np.eye(d) for _ in range(nb_factors)]
    lst_factors[-1] = np.zeros((d, d))
    _lambda = 1.  # init the scaling factor at 1

    # Create the projection operators for each factor
    lst_proj_op_by_fac_step, lst_proj_op_by_fac_step_desc = build_constraint_set_smart(left_dim=d,
                                                                                       right_dim=d,
                                                                                       nb_factors=nb_factors,
                                                                                       sparsity_factor=sparsity_factor,
                                                                                       residual_on_right=True,
                                                                                       fast_unstable_proj=False, constant_first=False)

    # Call the algorithm
    final_lambda, final_factors, final_X, _, _ = hierarchical_palm4msa(
        arr_X_target=H,
        lst_S_init=lst_factors,
        lst_dct_projection_function=lst_proj_op_by_fac_step,
        f_lambda_init=_lambda,
        nb_iter=nb_iter,
        update_right_to_left=True,
        residual_on_right=True)

    return final_lambda, lst_factors, final_factors, final_X
Beispiel #4
0
    def test_compare_qmeans(self):

        lst_constraints, lst_constraints_vals = build_constraint_set_smart(
            self.U_centroids_hat.shape[0],
            self.U_centroids_hat.shape[1],
            self.nb_factors,
            sparsity_factor=self.sparsity_factor,
            residual_on_right=self.residual_on_right)

        hierarchical_palm_init = {
            "init_lambda": 1.,
            "nb_iter": self.nb_iter_palm,
            "lst_constraint_sets": lst_constraints,
            "residual_on_right": self.residual_on_right
        }

        for hierarchical_inside in (True, False):
            print(hierarchical_inside)
            objective_values_q_fast, op_centroids, t_fast = \
                qmeans_fast(self.X, self.nb_clusters, self.nb_iter_kmeans, self.nb_factors,
                            hierarchical_palm_init,
                            initialization=self.U_centroids_hat,
                            hierarchical_inside=hierarchical_inside)
            objective_values_q_slow, U, t_slow = \
                qmeans_slow(self.X, self.nb_clusters, self.nb_iter_kmeans, self.nb_factors,
                            hierarchical_palm_init,
                            initialization=self.U_centroids_hat,
                            graphical_display=False,
                            hierarchical_inside=hierarchical_inside)
            np.testing.assert_array_almost_equal(objective_values_q_fast,
                                                 objective_values_q_slow[:, 1])
            np.testing.assert_array_almost_equal(t_fast, t_slow)
Beispiel #5
0
    def test_run_qmeans_fast(self):
        lst_constraints, lst_constraints_vals = build_constraint_set_smart(
            self.U_centroids_hat.shape[0],
            self.U_centroids_hat.shape[1],
            self.nb_factors,
            sparsity_factor=self.sparsity_factor,
            residual_on_right=self.residual_on_right)

        hierarchical_palm_init = {
            "init_lambda": 1.,
            "nb_iter": self.nb_iter_palm,
            "lst_constraint_sets": lst_constraints,
            "residual_on_right": self.residual_on_right,
            "delta_objective_error_threshold":
            self.delta_objective_error_threshold,
            "track_objective": True
        }

        for hierarchical_inside in (True, False):
            print(hierarchical_inside)
            objective_values_q_fast, op_centroids, t_fast, lst_obj_palm = \
                qmeans_fast(self.X,
                            self.nb_clusters,
                            self.nb_iter_kmeans,
                            self.nb_factors,
                            hierarchical_palm_init,
                            initialization=self.U_centroids_hat,
                            hierarchical_inside=hierarchical_inside)

            self.assertEqual(len(op_centroids), self.nb_factors - 1)
            self.assertTrue(
                len(objective_values_q_fast) <= self.nb_iter_kmeans)
            self.assertEqual(len(t_fast), len(self.X))
            self.assertEqual(len(lst_obj_palm),
                             len(objective_values_q_fast) + 1)
            if hierarchical_inside == True:
                self.assertTrue(
                    all(
                        len(obj_palm) == self.nb_factors - 1
                        for obj_palm in lst_obj_palm))
                self.assertTrue(
                    all(
                        all(
                            len(sub_palm_split_fine) == 2
                            for sub_palm_split_fine in obj_palm)
                        for obj_palm in lst_obj_palm))
            else:
                self.assertTrue(
                    all(
                        len(obj_palm) == self.nb_iter_palm
                        for obj_palm in lst_obj_palm))
def main_qmeans(X, U_init):
    """
    Will perform the qmeans Algorithm on X with U_init as initialization.

    :param X: The input data in which to find the clusters.
    :param U_init: The initialization of the the clusters.

    :return: The final centroids as sparse factors, the indicator vector
    """
    lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart(left_dim=U_init.shape[0],
                                                                               right_dim=U_init.shape[1],
                                                                               nb_factors=paraman["--nb-factors"] + 1,
                                                                               sparsity_factor=paraman["--sparsity-factor"],
                                                                               residual_on_right=paraman["--residual-on-right"])

    parameters_palm4msa = {
        "init_lambda": 1.,
        "nb_iter": paraman["--nb-iteration-palm"],
        "lst_constraint_sets": lst_constraint_sets,
        "residual_on_right": paraman["--residual-on-right"],
        "delta_objective_error_threshold": paraman["--delta-threshold"],
        "track_objective": False
    }

    start_qmeans = time.process_time()
    objective_values_q, final_centroids, indicator_vector_final, lst_all_objective_functions_palm = qmeans(X_data=X,
                                                 K_nb_cluster=paraman["--nb-cluster"],
                                                 nb_iter=paraman["--nb-iteration"],
                                                 nb_factors=paraman["--nb-factors"] + 1,
                                                 params_palm4msa=parameters_palm4msa,
                                                 initialization=U_init,
                                                 hierarchical_inside=paraman["--hierarchical"],
                                                 )
    stop_qmeans = time.process_time()
    qmeans_traintime = stop_qmeans - start_qmeans
    qmeans_results = {
        "traintime": qmeans_traintime
    }


    objprinter.add("qmeans_objective", ("after t", ), objective_values_q)
    if paraman["--hierarchical"]:
        objprinter.add("palm_objectives", ("nb_iter_qmeans", "nb_factor-1", "split-finetune", "nb_iter_palm", "nb_factor_tracked"), lst_all_objective_functions_palm)
    else:
        objprinter.add("palm_objectives", ("nb_iter_qmeans", "nb_iter_palm", "nb_factor_tracked"), lst_all_objective_functions_palm)
    resprinter.add(qmeans_results)

    return final_centroids, indicator_vector_final
def process_palm_on_top_of_kmeans(kmeans_centroids):
    lst_constraint_sets, lst_constraint_sets_desc = build_constraint_set_smart(
        left_dim=kmeans_centroids.shape[0],
        right_dim=kmeans_centroids.shape[1],
        nb_factors=paraman["--nb-factors"] + 1,
        sparsity_factor=paraman["--sparsity-factor"],
        residual_on_right=paraman["--residual-on-right"],
        fast_unstable_proj=True)

    lst_factors = init_lst_factors(*kmeans_centroids.shape,
                                   paraman["--nb-factors"] + 1)

    eye_norm = np.sqrt(kmeans_centroids.shape[0])

    if paraman["--hierarchical"]:
        _lambda_tmp, op_factors, U_centroids, nb_iter_by_factor, objective_palm = \
            hierarchical_palm4msa(
                arr_X_target=np.eye(kmeans_centroids.shape[0]) @ kmeans_centroids,
                lst_S_init=lst_factors,
                lst_dct_projection_function=lst_constraint_sets,
                f_lambda_init=1. * eye_norm,
                nb_iter=paraman["--nb-iteration-palm"],
                update_right_to_left=True,
                residual_on_right=paraman["--residual-on-right"],
                delta_objective_error_threshold_palm=paraman["--delta-threshold"],
                track_objective_palm=False)
    else:
        _lambda_tmp, op_factors, _, objective_palm, nb_iter_palm = \
            palm4msa(arr_X_target=np.eye(kmeans_centroids.shape[0]) @ kmeans_centroids,
                     lst_S_init=lst_factors,
                     nb_factors=len(lst_factors),
                     lst_projection_functions=lst_constraint_sets[-1]["finetune"],
                     f_lambda_init=1. * eye_norm,
                     nb_iter=paraman["--nb-iteration-palm"],
                     update_right_to_left=True,
                     delta_objective_error_threshold=paraman["--delta-threshold"],
                     track_objective=False)

    log_memory_usage(
        "Memory after palm on top of kmeans in process_palm_on_top_of_kmeans")

    _lambda = _lambda_tmp / eye_norm
    lst_factors_ = op_factors.get_list_of_factors()
    op_centroids = SparseFactors([lst_factors_[1] * _lambda] +
                                 lst_factors_[2:])

    return op_centroids
Beispiel #8
0
    def test_build_constraint_set(self):
        left_dim = 10
        right_dim = 32
        nb_fac = 4
        sparsity_factor = 2
        residual_on_right = False

        lst_constraints, lst_constraints_vals = build_constraint_set_smart(
            left_dim,
            right_dim,
            nb_fac,
            sparsity_factor=sparsity_factor,
            residual_on_right=residual_on_right,
            constant_first=True,
            hierarchical=False)

        print(lst_constraints_vals)
# Parameters for palm
nb_iter = 300
nb_factors = int(np.log2(32))
sparsity_factor = 2

# Create init sparse factors as identity (the first sparse matrix will remain constant)
lst_factors = [np.eye(d) for _ in range(nb_factors)]
lst_factors[-1] = np.zeros((d, d))
_lambda = 1.  # init the scaling factor at 1

# Create the projection operators for each factor
lst_proj_op_by_fac_step, lst_proj_op_by_fac_step_desc = build_constraint_set_smart(
    left_dim=d,
    right_dim=d,
    nb_factors=nb_factors,
    sparsity_factor=sparsity_factor,
    residual_on_right=True,
    fast_unstable_proj=False,
    constant_first=False)

logger.info(
    "Description of projection operators for each iteration of hierarchical_palm: \n{}"
    .format(pprint.pformat(lst_proj_op_by_fac_step_desc)))
print(np.__version__)

# Call the algorithm
final_lambda, final_factors, final_X, _, _ = hierarchical_palm4msa(
    arr_X_target=H,
    lst_S_init=lst_factors,
    lst_dct_projection_function=lst_proj_op_by_fac_step,
    f_lambda_init=_lambda,
Beispiel #10
0
        nb_factors = int(np.log2(min(nb_clusters, n_features)))
    X, _ = datasets.make_blobs(n_samples=n_samples,
                               n_features=n_features,
                               centers=n_centers)

    U_centroids_hat = X[np.random.permutation(X.shape[0])[:nb_clusters]]
    # kmeans++ initialization is not feasible because complexity is O(ndk)...
    residual_on_right = True

    sparsity_factor = 2
    nb_iter_palm = 30
    delta_objective_error_threshold_in_palm = 1e-6
    track_objective_in_palm = True

    lst_constraints, lst_constraints_vals = build_constraint_set_smart(
        U_centroids_hat.shape[0], U_centroids_hat.shape[1], nb_factors,
        sparsity_factor=sparsity_factor, residual_on_right=residual_on_right)
    logger.info("constraints: {}".format(pformat(lst_constraints_vals)))


    hierarchical_palm_init = {
        "init_lambda": 1.,
        "nb_iter": nb_iter_palm,
        "lst_constraint_sets": lst_constraints,
        "residual_on_right": residual_on_right,
        "delta_objective_error_threshold": delta_objective_error_threshold_in_palm,
        "track_objective": track_objective_in_palm
    }

    logger.info('Running QuicK-means with H-Palm')
    objective_function_with_hier_palm, op_centroids_hier, indicator_hier, lst_objective_function_hier_palm = \
Beispiel #11
0
        X.shape[0]
    )[:
      nb_clusters]]  # kmeans++ initialization is not feasible because complexity is O(ndk)...

    nb_factors = 5
    sparsity_factor = 2
    nb_iter_palm = 300

    residual_on_right = False

    # lst_constraints, lst_constraints_vals = build_constraint_sets(U_centroids_hat.shape[0], U_centroids_hat.shape[1], nb_factors, sparsity_factor=sparsity_factor)
    K = U_centroids_hat.shape[0]
    d = U_centroids_hat.shape[1]
    lst_constraints, lst_constraints_vals = build_constraint_set_smart(
        K,
        d,
        nb_factors,
        sparsity_factor=sparsity_factor,
        residual_on_right=residual_on_right)
    logger.info("constraints: {}".format(pformat(lst_constraints_vals)))

    hierarchical_palm_init = {
        "init_lambda": 1.,
        "nb_iter": nb_iter_palm,
        "lst_constraint_sets": lst_constraints,
        "residual_on_right": residual_on_right
    }

    # try:
    objective_values_q_hier, centroids_finaux_q_hier, indicator_hier = qmeans(
        X,
        nb_clusters,
Beispiel #12
0
def main(small_dim):
    # Main code
    np.random.seed(0)
    if small_dim:
        nb_clusters = 10
        nb_iter_kmeans = 10
        n_samples = 1000
        n_features = 20
        n_centers = 50
        nb_factors = 5
    else:
        nb_clusters = 256
        nb_iter_kmeans = 10
        n_samples = 10000
        n_features = 2048
        n_centers = 4096
        nb_factors = int(np.log2(min(nb_clusters, n_features)))
    X, _ = datasets.make_blobs(n_samples=n_samples,
                               n_features=n_features,
                               centers=n_centers)

    U_centroids_hat = X[np.random.permutation(X.shape[0])[:nb_clusters]]
    # kmeans++ initialization is not feasible because complexity is O(ndk)...
    residual_on_right = nb_clusters < n_features

    sparsity_factor = 2
    nb_iter_palm = 300
    delta_objective_error_threshold = 1e-6

    lst_constraints, lst_constraints_vals = build_constraint_set_smart(
        U_centroids_hat.shape[0],
        U_centroids_hat.shape[1],
        nb_factors,
        sparsity_factor=sparsity_factor,
        residual_on_right=residual_on_right,
        fast_unstable_proj=True)
    logger.info("constraints: {}".format(pformat(lst_constraints_vals)))

    hierarchical_palm_init = {
        "init_lambda": 1.,
        "nb_iter": nb_iter_palm,
        "lst_constraint_sets": lst_constraints,
        "residual_on_right": residual_on_right,
        "delta_objective_error_threshold": 1e-6,
        "track_objective": False,
    }

    # try:
    # logger.info('Running QuicK-means with H-Palm')
    # objective_function_with_hier_palm, op_centroids_hier, indicator_hier = \
    #     qmeans(X, nb_clusters, nb_iter_kmeans,
    #            nb_factors, hierarchical_palm_init,
    #            initialization=U_centroids_hat,
    #            graphical_display=graphical_display,
    #            hierarchical_inside=True)
    # # return_objective_function=True)

    logger.info('Running QuicK-means with Palm')
    objective_function_palm, op_centroids_palm, indicator_palm, _ = \
        qmeans(X_data=X,
               K_nb_cluster=nb_clusters,
               nb_iter=nb_iter_kmeans,
               nb_factors=nb_factors,
               params_palm4msa=hierarchical_palm_init,
               initialization=U_centroids_hat,
               delta_objective_error_threshold=delta_objective_error_threshold)
    # return_objective_function=True)
    # except Exception as e:
    #     logger.info("There have been a problem in qmeans: {}".format(str(e)))
    try:
        logger.info('Running K-means')
        objective_values_k, centroids_finaux, indicator_kmean = \
            kmeans(X, nb_clusters, nb_iter_kmeans,
                   initialization=U_centroids_hat)
    except SystemExit as e:
        logger.info("There have been a problem in kmeans: {}".format(str(e)))
dims = [(d, d), (d, d // 2), (d // 2, d)]

results = {}
for n_fac in n_facs:
    for dim in dims:
        pair = dict()
        lst_factors = init_lst_factors(dim[0],
                                       dim[1],
                                       n_fac,
                                       first_square=False)

        # construit les contraintes de projection dans une liste
        lst_constraints, lst_constraints_vals = build_constraint_set_smart(
            dim[0],
            dim[1],
            n_fac,
            sparsity_factor=sparsity_fac,
            residual_on_right=True,
            fast_unstable_proj=False,
            constant_first=False)
        lst_constraints_palm = lst_constraints[-1]["finetune"]

        # construit la matrice cible
        X_target = np.random.rand(dim[0], dim[1])

        # op_factor est en quelque sortes la liste des facteurs sparses
        _lambda, op_factors, _, _, _ = \
            palm4msa(
                arr_X_target=X_target,
                lst_S_init=lst_factors,
                nb_factors=len(lst_factors),
                lst_projection_functions=lst_constraints_palm,