def main_kmeans(X, U_init):
    """
    Will perform the k means algorithm on X with U_init as initialization

    :param X: The input data in which to find the clusters.
    :param U_init: The initialization of the the clusters.

    :return: The final centroids, the indicator vector
    """
    start_kmeans = time.process_time()
    if paraman["--minibatch"]:
        objective_values_k, final_centroids, indicator_vector_final = kmeans_minibatch(X_data=X,
                                                                                       K_nb_cluster=paraman["--nb-cluster"],
                                                                                       nb_iter=paraman["--nb-iteration"],
                                                                                       initialization=U_init,
                                                                                       batch_size=paraman["--minibatch"]
                                                                                       )
    else:
        objective_values_k, final_centroids, indicator_vector_final = kmeans(X_data=X,
               K_nb_cluster=paraman["--nb-cluster"],
               nb_iter=paraman["--nb-iteration"],
               initialization=U_init)
    stop_kmeans = time.process_time()
    kmeans_traintime = stop_kmeans - start_kmeans

    kmeans_results = {
        "traintime": kmeans_traintime
    }

    objprinter.add("kmeans_objective", ("after t", ), objective_values_k)
    resprinter.add(kmeans_results)

    return final_centroids, indicator_vector_final
def main_kmeans(X, U_init):
    start_kmeans = time.time()
    objective_values_k, final_centroids, indicator_vector_final = kmeans(
        X_data=X,
        K_nb_cluster=paraman["--nb-cluster"],
        nb_iter=paraman["--nb-iteration"],
        initialization=U_init)
    stop_kmeans = time.time()
    kmeans_traintime = stop_kmeans - start_kmeans

    kmeans_results = {"traintime": kmeans_traintime}

    objprinter.add("kmeans_objective", ("after t", ), objective_values_k)
    resprinter.add(kmeans_results)

    return final_centroids, indicator_vector_final
Ejemplo n.º 3
0
def main_kmeans(X, U_init):
    """
    Will perform the k means algorithm on X with U_init as initialization

    :param X: The input data in which to find the clusters.
    :param U_init: The initialization of the the clusters.

    :return: The final centroids, the indicator vector
    """
    if paraman["--minibatch"]:
        objective_values_k, final_centroids, indicator_vector_final = kmeans_minibatch(X_data=X,
                                                                                       K_nb_cluster=U_init.shape[0],
                                                                                       nb_iter=paraman["--nb-iteration"],
                                                                                       initialization=U_init,
                                                                                       batch_size=paraman["--minibatch"]
                                                                                       )
    else:
        objective_values_k, final_centroids, indicator_vector_final = kmeans(X_data=X,
               K_nb_cluster=U_init.shape[0],
               nb_iter=paraman["--nb-iteration"],
               initialization=U_init)

    return final_centroids, indicator_vector_final
Ejemplo n.º 4
0
               nb_iter_kmeans,
               nb_factors,
               hierarchical_palm_init,
               initialization=U_centroids_hat,
               hierarchical_inside=True)

    logger.info('Running QuicK-means with Palm')
    objective_function_with_palm, op_centroids_palm, indicator_palm, lst_objective_function_palm = \
        qmeans(X, nb_clusters, nb_iter_kmeans, nb_factors,
               hierarchical_palm_init,
               initialization=U_centroids_hat)

    try:
        logger.info('Running K-means')
        objective_values_k, centroids_finaux, indicator_kmean = \
            kmeans(X, nb_clusters, nb_iter_kmeans,
                   initialization=U_centroids_hat)
    except SystemExit as e:
        logger.info("There have been a problem in kmeans: {}".format(str(e)))

    logger.info('Display')
    plt.figure()

    plt.plot(np.arange(len(objective_function_with_hier_palm)), objective_function_with_hier_palm, marker="x", label="hierarchical")
    plt.plot(np.arange(len(objective_function_with_palm)), objective_function_with_palm, marker="x", label="palm")
    plt.plot(np.arange(len(objective_values_k)), objective_values_k, marker="x", label="kmeans")

    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = OrderedDict(zip(labels, handles))
    plt.legend(by_label.values(), by_label.keys())
    plt.show()
Ejemplo n.º 5
0
        graphical_display=False,
        hierarchical_inside=True)
    # objective_values_q_hier, centroids_finaux_q_hier, indicator = qmeans(X, nb_clusters, nb_iter_kmeans, nb_factors, hierarchical_palm_init, initialization=U_centroids_hat, graphical_display=True, hierarchical_inside=True)
    objective_values_q, centroids_finaux_q, indicator = qmeans(
        X,
        nb_clusters,
        nb_iter_kmeans,
        nb_factors,
        hierarchical_palm_init,
        initialization=U_centroids_hat,
        graphical_display=False)
    # except Exception as e:
    #     logger.info("There have been a problem in qmeans: {}".format(str(e)))
    try:

        objective_values_k, centroids_finaux, indicator = kmeans(
            X, nb_clusters, nb_iter_kmeans, initialization=U_centroids_hat)
    except SystemExit as e:
        logger.info("There have been a problem in kmeans: {}".format(str(e)))

    plt.figure()
    # plt.yscale("log")

    plt.scatter(np.arange(len(objective_values_q) - 1) + 0.5,
                objective_values_q[1:, 0],
                marker="x",
                label="qmeans after palm(0)",
                color="b")
    plt.scatter((2 * np.arange(len(objective_values_q)) + 1) / 2 - 0.5,
                objective_values_q[:, 1],
                marker="x",
                label="qmeans after t (1)",
Ejemplo n.º 6
0
def main_kmeans(X, U_init):
    """
    Will perform the k means algorithm on X with U_init as initialization

    :param X: The input data in which to find the clusters.
    :param U_init: The initialization of the the clusters.

    :return: The final centroids, the indicator vector
    """
    if paraman["--l1-proj"] is not None and paraman["--lambda-l1-proj"] is None:
        if paraman["--blobs"] is not None:
            param_lambda = None
        elif paraman["--caltech256"] is not None:
            param_lambda = 868
        elif paraman["--census"]:
            param_lambda = None
        elif paraman["--kddcup04"]:
            param_lambda = None
        elif paraman["--kddcup99"]:
            param_lambda = 4.9
        elif paraman["--plants"]:
            param_lambda = None
        elif paraman["--breast-cancer"]:
            param_lambda = 600
        elif paraman["--covtype"]:
            param_lambda = 964
        elif paraman["--mnist"]:
            param_lambda = 2550
        elif paraman["--fashion-mnist"]:
            param_lambda = 1485
        elif paraman["--light-blobs"]:
            param_lambda = None
        elif paraman["--lfw"]:
            param_lambda = None
        elif paraman["--million-blobs"] is not None:
            param_lambda = None
        elif paraman["--coil20"] is not None:
            param_lambda = 5.6
        else:
            raise NotImplementedError("Unknown dataset.")
    else:
        param_lambda = paraman["--lambda-l1-proj"]

    start_kmeans = time.process_time()
    if paraman["--minibatch"]:
        objective_values_k, final_centroids, indicator_vector_final = kmeans_minibatch(
            X_data=X,
            K_nb_cluster=paraman["--nb-cluster"],
            nb_iter=paraman["--nb-iteration"],
            initialization=U_init,
            batch_size=paraman["--minibatch"],
            proj_l1=paraman["--l1-proj"],
            _lambda=param_lambda,
            epsilon=paraman["--epsilon-tol-proj"])
    else:
        objective_values_k, final_centroids, indicator_vector_final = kmeans(
            X_data=X,
            K_nb_cluster=paraman["--nb-cluster"],
            nb_iter=paraman["--nb-iteration"],
            initialization=U_init,
            proj_l1=paraman["--l1-proj"],
            _lambda=param_lambda,
            epsilon=paraman["--epsilon-tol-proj"])
    stop_kmeans = time.process_time()
    kmeans_traintime = stop_kmeans - start_kmeans

    print(objective_values_k[-1])

    kmeans_results = {
        "traintime": kmeans_traintime,
        "actual_param_lambda": param_lambda
    }

    objprinter.add("kmeans_objective", ("after t", ), objective_values_k)
    resprinter.add(kmeans_results)

    return final_centroids, indicator_vector_final
Ejemplo n.º 7
0
def main(small_dim):
    # Main code
    np.random.seed(0)
    if small_dim:
        nb_clusters = 10
        nb_iter_kmeans = 10
        n_samples = 1000
        n_features = 20
        n_centers = 50
        nb_factors = 5
    else:
        nb_clusters = 256
        nb_iter_kmeans = 10
        n_samples = 10000
        n_features = 2048
        n_centers = 4096
        nb_factors = int(np.log2(min(nb_clusters, n_features)))
    X, _ = datasets.make_blobs(n_samples=n_samples,
                               n_features=n_features,
                               centers=n_centers)

    U_centroids_hat = X[np.random.permutation(X.shape[0])[:nb_clusters]]
    # kmeans++ initialization is not feasible because complexity is O(ndk)...
    residual_on_right = nb_clusters < n_features

    sparsity_factor = 2
    nb_iter_palm = 300
    delta_objective_error_threshold = 1e-6

    lst_constraints, lst_constraints_vals = build_constraint_set_smart(
        U_centroids_hat.shape[0],
        U_centroids_hat.shape[1],
        nb_factors,
        sparsity_factor=sparsity_factor,
        residual_on_right=residual_on_right,
        fast_unstable_proj=True)
    logger.info("constraints: {}".format(pformat(lst_constraints_vals)))

    hierarchical_palm_init = {
        "init_lambda": 1.,
        "nb_iter": nb_iter_palm,
        "lst_constraint_sets": lst_constraints,
        "residual_on_right": residual_on_right,
        "delta_objective_error_threshold": 1e-6,
        "track_objective": False,
    }

    # try:
    # logger.info('Running QuicK-means with H-Palm')
    # objective_function_with_hier_palm, op_centroids_hier, indicator_hier = \
    #     qmeans(X, nb_clusters, nb_iter_kmeans,
    #            nb_factors, hierarchical_palm_init,
    #            initialization=U_centroids_hat,
    #            graphical_display=graphical_display,
    #            hierarchical_inside=True)
    # # return_objective_function=True)

    logger.info('Running QuicK-means with Palm')
    objective_function_palm, op_centroids_palm, indicator_palm, _ = \
        qmeans(X_data=X,
               K_nb_cluster=nb_clusters,
               nb_iter=nb_iter_kmeans,
               nb_factors=nb_factors,
               params_palm4msa=hierarchical_palm_init,
               initialization=U_centroids_hat,
               delta_objective_error_threshold=delta_objective_error_threshold)
    # return_objective_function=True)
    # except Exception as e:
    #     logger.info("There have been a problem in qmeans: {}".format(str(e)))
    try:
        logger.info('Running K-means')
        objective_values_k, centroids_finaux, indicator_kmean = \
            kmeans(X, nb_clusters, nb_iter_kmeans,
                   initialization=U_centroids_hat)
    except SystemExit as e:
        logger.info("There have been a problem in kmeans: {}".format(str(e)))