def main_kmeans(X, U_init):
    """
    Will perform the k means algorithm on X with U_init as initialization

    :param X: The input data in which to find the clusters.
    :param U_init: The initialization of the the clusters.

    :return: The final centroids, the indicator vector
    """
    start_kmeans = time.process_time()
    if paraman["--minibatch"]:
        objective_values_k, final_centroids, indicator_vector_final = kmeans_minibatch(X_data=X,
                                                                                       K_nb_cluster=paraman["--nb-cluster"],
                                                                                       nb_iter=paraman["--nb-iteration"],
                                                                                       initialization=U_init,
                                                                                       batch_size=paraman["--minibatch"]
                                                                                       )
    else:
        objective_values_k, final_centroids, indicator_vector_final = kmeans(X_data=X,
               K_nb_cluster=paraman["--nb-cluster"],
               nb_iter=paraman["--nb-iteration"],
               initialization=U_init)
    stop_kmeans = time.process_time()
    kmeans_traintime = stop_kmeans - start_kmeans

    kmeans_results = {
        "traintime": kmeans_traintime
    }

    objprinter.add("kmeans_objective", ("after t", ), objective_values_k)
    resprinter.add(kmeans_results)

    return final_centroids, indicator_vector_final
Esempio n. 2
0
def main_kmeans(X, U_init):
    """
    Will perform the k means algorithm on X with U_init as initialization

    :param X: The input data in which to find the clusters.
    :param U_init: The initialization of the the clusters.

    :return: The final centroids, the indicator vector
    """
    if paraman["--minibatch"]:
        objective_values_k, final_centroids, indicator_vector_final = kmeans_minibatch(X_data=X,
                                                                                       K_nb_cluster=U_init.shape[0],
                                                                                       nb_iter=paraman["--nb-iteration"],
                                                                                       initialization=U_init,
                                                                                       batch_size=paraman["--minibatch"]
                                                                                       )
    else:
        objective_values_k, final_centroids, indicator_vector_final = kmeans(X_data=X,
               K_nb_cluster=U_init.shape[0],
               nb_iter=paraman["--nb-iteration"],
               initialization=U_init)

    return final_centroids, indicator_vector_final
Esempio n. 3
0
def main_kmeans(X, U_init):
    """
    Will perform the k means algorithm on X with U_init as initialization

    :param X: The input data in which to find the clusters.
    :param U_init: The initialization of the the clusters.

    :return: The final centroids, the indicator vector
    """
    if paraman["--l1-proj"] is not None and paraman["--lambda-l1-proj"] is None:
        if paraman["--blobs"] is not None:
            param_lambda = None
        elif paraman["--caltech256"] is not None:
            param_lambda = 868
        elif paraman["--census"]:
            param_lambda = None
        elif paraman["--kddcup04"]:
            param_lambda = None
        elif paraman["--kddcup99"]:
            param_lambda = 4.9
        elif paraman["--plants"]:
            param_lambda = None
        elif paraman["--breast-cancer"]:
            param_lambda = 600
        elif paraman["--covtype"]:
            param_lambda = 964
        elif paraman["--mnist"]:
            param_lambda = 2550
        elif paraman["--fashion-mnist"]:
            param_lambda = 1485
        elif paraman["--light-blobs"]:
            param_lambda = None
        elif paraman["--lfw"]:
            param_lambda = None
        elif paraman["--million-blobs"] is not None:
            param_lambda = None
        elif paraman["--coil20"] is not None:
            param_lambda = 5.6
        else:
            raise NotImplementedError("Unknown dataset.")
    else:
        param_lambda = paraman["--lambda-l1-proj"]

    start_kmeans = time.process_time()
    if paraman["--minibatch"]:
        objective_values_k, final_centroids, indicator_vector_final = kmeans_minibatch(
            X_data=X,
            K_nb_cluster=paraman["--nb-cluster"],
            nb_iter=paraman["--nb-iteration"],
            initialization=U_init,
            batch_size=paraman["--minibatch"],
            proj_l1=paraman["--l1-proj"],
            _lambda=param_lambda,
            epsilon=paraman["--epsilon-tol-proj"])
    else:
        objective_values_k, final_centroids, indicator_vector_final = kmeans(
            X_data=X,
            K_nb_cluster=paraman["--nb-cluster"],
            nb_iter=paraman["--nb-iteration"],
            initialization=U_init,
            proj_l1=paraman["--l1-proj"],
            _lambda=param_lambda,
            epsilon=paraman["--epsilon-tol-proj"])
    stop_kmeans = time.process_time()
    kmeans_traintime = stop_kmeans - start_kmeans

    print(objective_values_k[-1])

    kmeans_results = {
        "traintime": kmeans_traintime,
        "actual_param_lambda": param_lambda
    }

    objprinter.add("kmeans_objective", ("after t", ), objective_values_k)
    resprinter.add(kmeans_results)

    return final_centroids, indicator_vector_final