Python kmeans_clustering 예제들, clustering.kmeans_clustering Python 예제들

예제 #1

0

파일 보기

def domanda_6():
    print("Computing: hierchical clustering on dataset_212...")
    C1, t1, d1 = hierarchical_clustering(dataset_212, 9, weighted)
    print("Distortion for hierchical clustering:", d1)
    print("Computing: kmeans clustering on dataset_212...")
    C2, t2, d2 = kmeans_clustering(dataset_212, 9, 5, weighted)
    print("Distortion for kmeans clustering:", d2)

예제 #2

0

파일 보기

파일: alg_project3_viz.py 프로젝트: archime/coursera-work

def visualize_data(cluster_input, data, method=None, display_centers=False):
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(data)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    if method == None:
        cluster_list = sequential_clustering(singleton_list, cluster_input)	
        print("Displaying", len(cluster_list), "sequential clusters")
    elif method == 'hierarchical_clustering':
        cluster_list = clustering.hierarchical_clustering(singleton_list, cluster_input)
        print("Displaying", len(cluster_list), "hierarchical clusters")
    elif method == 'kmeans_clustering':
        cluster_list = clustering.kmeans_clustering(singleton_list,
                                                    cluster_input[0],
                                                    cluster_input[1])
        print("Displaying", len(cluster_list), "k-means clusters")
    else:
        print("ERROR: method entered into visualize_data not recognized")

    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, display_centers)

예제 #3

0

파일 보기

파일: alg_project3_viz.py 프로젝트: cshintov/Courses

def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_111_URL)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    #cluster_list = sequential_clustering(singleton_list, 15)    
    #print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = clustering.hierarchical_clustering(singleton_list, 15)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = clustering.kmeans_clustering(singleton_list, 9, 5)   
    print "Displaying", len(cluster_list), "k-means clusters"

            
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   # use toggle in GUI to add cluster centers

예제 #4

0

파일 보기

def domanda_5():
    print("Computing: kmeans clustering on dataset_212...")
    C, t, d = kmeans_clustering(dataset_212, 9, 5, weighted)
    print("Drawing...")
    draw_clustering(
        C, "Clustering k-means su 212 contee" +
        (" (v. pesata)" if weighted else ""))

예제 #5

0

파일 보기

def domanda_2():
    print("Computing: kmeans clustering on dataset_full...")
    C, t, d = kmeans_clustering(dataset_full, 15, 5, weighted)
    print("Drawing...")
    draw_clustering(
        C, "Clustering k-means sull'intero dataset" +
        (" (v. pesata)" if weighted else ""))

예제 #6

0

파일 보기

파일: alg_project3_viz.py 프로젝트: lobzison/python-stuff

def kmeans_dist(data_url):
    """
    Calculates distirtion of k_means for 6-20 clusters
    """
    res = {}
    data_table = load_data_table(data_url)

    for num_clust in range(6, 21):
        singleton_list = []
        for line in data_table:
            singleton_list.append(
                alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                    line[4]))
        cluster_list = alg_project3_solution.kmeans_clustering(
            singleton_list, num_clust, 5)
        res[num_clust] = ca.compute_distortion(cluster_list, data_table)
    return res

예제 #7

0

파일 보기

def domanda_9():
    print("Computing...")
    datasets = {
        "dataset con 212 contee": dataset_212,
        "dataset con 562 contee": dataset_562,
        "dataset con 1041 contee": dataset_1041
    }
    min_c, max_c = 6, 21
    interval = range(min_c, max_c)
    for name, dataset in datasets.items():
        h_distortion_list = hierarchical_clustering_distortion_list(
            dataset, min_c, weighted)[min_c:max_c]
        k_distortion_list = [
            kmeans_clustering(dataset, i, 5, weighted)[2] for i in interval
        ]
        draw_distortion(list(interval), h_distortion_list, k_distortion_list,
                        name + (" (v. pesata)" if weighted else ""))

예제 #8

0

파일 보기

파일: testsuite.py 프로젝트: cshintov/Courses

def test_kmeans():
    """
    Test for k-means clustering
    kmeans_clustering should not mutate cluster_list, but make a new copy of each test anyways
    """
    
    # load small data table
    print
    print "Testing kmeans_clustering on 24 county set"
    data_24_table = load_data_table(DATA_24_URL)
        
    kmeansdata_24 = [[15, 1, set([('34017', '36061'), ('06037',), ('06059',), ('36047',), ('36081',), ('06071', '08031'), ('36059',), ('36005',), ('55079',), ('34013', '34039'), ('06075',), ('01073',), ('06029',), ('41051', '41067'), ('11001', '24510', '51013', '51760', '51840', '54009')])], 
                     [15, 3, set([('34017', '36061'), ('06037', '06059'), ('06071',), ('36047',), ('36081',), ('08031',), ('36059',), ('36005',), ('55079',), ('34013', '34039'), ('06075',), ('01073',), ('06029',), ('41051', '41067'), ('11001', '24510', '51013', '51760', '51840', '54009')])],
                     [15, 5, set([('34017', '36061'), ('06037', '06059'), ('06071',), ('36047',), ('36081',), ('08031',), ('36059',), ('36005',), ('55079',), ('34013', '34039'), ('06075',), ('01073',), ('06029',), ('41051', '41067'), ('11001', '24510', '51013', '51760', '51840', '54009')])],
                     [10, 1, set([('34017', '36061'), ('06029', '06037', '06075'), ('11001', '24510', '34013', '34039', '51013', '51760', '51840', '54009'), ('06059',), ('36047',), ('36081',), ('06071', '08031', '41051', '41067'), ('36059',), ('36005',), ('01073', '55079')])],
                     [10, 3, set([('34013', '34017', '36061'), ('06029', '06037', '06075'), ('08031', '41051', '41067'), ('06059', '06071'), ('34039', '36047'), ('36081',), ('36059',), ('36005',), ('01073', '55079'), ('11001', '24510', '51013', '51760', '51840', '54009')])],
                     [10, 5, set([('34013', '34017', '36061'), ('06029', '06037', '06075'), ('08031', '41051', '41067'), ('06059', '06071'), ('34039', '36047'), ('36081',), ('36059',), ('36005',), ('01073', '55079'), ('11001', '24510', '51013', '51760', '51840', '54009')])],
                     [5, 1, set([('06029', '06037', '06075'), ('01073', '11001', '24510', '34013', '34017', '34039', '36047', '51013', '51760', '51840', '54009', '55079'), ('06059',), ('36005', '36059', '36061', '36081'), ('06071', '08031', '41051', '41067')])],
                     [5, 3, set([('06029', '06037', '06075'), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013'), ('08031', '41051', '41067'), ('06059', '06071'), ('01073', '51760', '51840', '54009', '55079')])],
                     [5, 5, set([('06029', '06037', '06075'), ('08031', '41051', '41067'), ('06059', '06071'), ('01073', '55079'), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009')])]]    
#    kmeansdata_24 = [[5, 5, set([('06029', '06037', '06075'), ('08031', '41051', '41067'), ('06059', '06071'), ('01073', '55079'), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009')])]]    
        
    suite = poc_simpletest.TestSuite()    
    
    for num_clusters, num_iterations, expected_county_tuple in kmeansdata_24:
        
        # build initial list of clusters for each test since mutation is allowed
        cluster_list = []
        for idx in range(len(data_24_table)):
            line = data_24_table[idx]
            cluster_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

        # compute student answer
        student_clustering = student.kmeans_clustering(cluster_list, num_clusters, num_iterations)
        student_county_tuple = set_of_county_tuples(student_clustering)
        
        # Prepare test
        error_message = "Testing kmeans_custering on 24 county table, num_clusters = " + str(num_clusters)
        error_message += " num_iterations = " + str(num_iterations)
        error_message += "\nStudent county tuples: " + str(student_county_tuple)
        error_message += "\nExpected county tuples: " + str(expected_county_tuple)
        suite.run_test(student_county_tuple == expected_county_tuple, True, error_message)   

    suite.report_results()

예제 #9

0

파일 보기

파일: text_EDA.py 프로젝트: GiovanniStephens/text-EDA

def get_cluster_labels(utterances):
    """
    Calculates cluster labels for all the utterances. 
    If there are few documents, I use PCA for dimension reduction and k-means
    for clustering. If there are more than 100, I use UMAP and HDBSCAN together.

    :utterances: list of strings
    :return: list of integers indicating what cluster they belong to.
    """
    n_docs = len(utterances)
    if n_docs <= 100:
        document_embeddings = embeddings.pretrained_transformer_embeddings(
            utterances)
        dims = min(clustering.get_optimal_n_components(document_embeddings),
                   10)
        reduced = clustering.reduce_dimensions_pca(document_embeddings,\
            dimensions=dims)
        return clustering.kmeans_clustering(reduced,\
            max_num_clusters=min(n_docs,80))
    else:
        document_embeddings = embeddings.word2vec_sif_embeddings(
            utterances, model_name=None)
        reduced = clustering.reduce_dimensions_umap(document_embeddings)
        return clustering.hdbscan_clustering(reduced)

예제 #10

0

파일 보기

def compute_q5_q6():
    # Load data
    table111 = viz_tools.load_data_table(DATA_111_URL)

    # Formate data as Clusters
    singleton_list = []
    for line in table111:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # Note: K-means tested first b/c clustering.hierarchical_clustering
    # mutates list of clusters

    # K-means
    kmeans_clusters = clustering.kmeans_clustering(singleton_list, 9, 5)
    k_distortion = compute_distortion(kmeans_clusters, table111)
    print("K-means Distortion: {}".format(k_distortion))

    # Hierarchical
    hierarchical_clusters = clustering.hierarchical_clustering(
        singleton_list, 9)
    h_distortion = compute_distortion(hierarchical_clusters, table111)
    print("Hierarchical Distortion: {}".format(h_distortion))

예제 #11

0

파일 보기

def test_compute_distortion():
    # Load data
    table290 = viz_tools.load_data_table(DATA_290_URL)

    # Formate data as Clusters
    singleton_list = []
    for line in table290:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # Note: K-means tested first b/c clustering.hierarchical_clustering
    # mutates list of clusters

    # Test 2: Expect 2.323×10^11
    kmeans_clusters = clustering.kmeans_clustering(singleton_list, 16, 5)
    k_distortion = compute_distortion(kmeans_clusters, table290)
    print("K-means Distortion: {}".format(k_distortion))

    # Test 1: Expect 2.575×10^11
    hierarchical_clusters = clustering.hierarchical_clustering(
        singleton_list, 16)
    h_distortion = compute_distortion(hierarchical_clusters, table290)
    print("Hierarchical Distortion: {}".format(h_distortion))

예제 #12

0

파일 보기

def q10(plot_key):
    # Load data
    table111 = viz_tools.load_data_table(DATA_111_URL)
    table290 = viz_tools.load_data_table(DATA_290_URL)
    table896 = viz_tools.load_data_table(DATA_896_URL)

    # Create cluster function
    create_cluster = lambda line: alg_cluster.Cluster(set([line[0]]), line[
        1], line[2], line[3], line[4])

    # Formate data as Clusters
    klist111 = [create_cluster(line) for line in table111]
    klist290 = [create_cluster(line) for line in table290]
    klist896 = [create_cluster(line) for line in table896]
    hlist111 = [create_cluster(line) for line in table111]
    hlist290 = [create_cluster(line) for line in table290]
    hlist896 = [create_cluster(line) for line in table896]

    # Initialize distortion lists
    distortion111k, distortion290k, distortion896k = [], [], []
    distortion111h, distortion290h, distortion896h = [], [], []

    # Calculate distortion lists
    for num in range(20, 5, -1):
        if plot_key == 111:
            kmeans_cluster111 = clustering.kmeans_clustering(klist111, num, 5)
            h_cluster111 = clustering.hierarchical_clustering(hlist111, num)
            distortion111k.append(
                compute_distortion(kmeans_cluster111, table111))
            distortion111h.append(compute_distortion(h_cluster111, table111))
        elif plot_key == 290:
            kmeans_cluster290 = clustering.kmeans_clustering(klist290, num, 5)
            h_cluster290 = clustering.hierarchical_clustering(hlist290, num)
            distortion290k.append(
                compute_distortion(kmeans_cluster290, table290))
            distortion290h.append(compute_distortion(h_cluster290, table290))
        elif plot_key == 896:
            kmeans_cluster896 = clustering.kmeans_clustering(klist896, num, 5)
            h_cluster896 = clustering.hierarchical_clustering(hlist896, num)
            distortion896k.append(
                compute_distortion(kmeans_cluster896, table896))
            distortion896h.append(compute_distortion(h_cluster896, table896))

    # Plot results
    fig = plt.figure('Distortion for Different Clustering Methods')
    plt.title('Distortion for Different Clustering Methods: {} Points'.format(
        plot_key))
    plt.xlabel('Number of Clusters')
    plt.ylabel('Distortion')

    x = list(range(20, 5, -1))

    if plot_key == 111:
        y1, y4 = distortion111k, distortion111h
        plt.plot(x, y1, '-bo', markersize=1, label='K-means (111)')
        plt.plot(x, y4, '-co', markersize=1, label='Hierarchical (111)')
    elif plot_key == 290:
        y2, y5 = distortion290k, distortion290h
        plt.plot(x, y2, '-go', markersize=1, label='K-means (290)')
        plt.plot(x, y5, '-mo', markersize=1, label='Hierarchical (290)')
    elif plot_key == 896:
        y3, y6 = distortion896k, distortion896h
        plt.plot(x, y3, '-ro', markersize=1, label='K-means (896)')
        plt.plot(x, y6, '-yo', markersize=1, label='Hierarchical (896)')

    plt.legend(loc='best')

    plt.show()

예제 #13

0

파일 보기

파일: main.py 프로젝트: Manneq/Raiffeisenbank_Clients_Clustering

def initial_data_clustering(data_for_clustering, data_for_lda, customer_ids,
                            category_names):
    """
        Function to cluster raw dataset using:
            1. DBSCAN algorithm
            2. OPTICS algorithm
            3. k-means algorithm
            4. Clustering by LDA topics
            5. Gaussian mixture model with Bayes classifier
        param:
            1. data_for_clustering - pandas DataFrame (10000, 82), where
                values are the mean spendings of customers for every
                category
            2. data_for_lda - pandas DataFrame (10000, 82), where values are
                the transactions number of customers for every category
            3. customer_ids - numpy array (10000, ) with all customer ids
            4. category_names - numpy array (82, ) with names of categories
    """
    # Clustering using DBSCAN algorithm
    print("\t0. DBSCAN clustering with DMDBSCAN method")
    time_start = time.time()

    clustering.dbscan_clustering(data_for_clustering, "initial data")
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/initial data/"
          "DBSCAN with DMDBSCAN/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using OPTICS algorithm
    print("\t1. OPTICS clustering")
    time_start = time.time()

    clustering.optics_clustering(data_for_clustering, "initial data")
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/initial data/OPTICS/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using k-means algorithm with
    # Silhouette method for clusters number choosing
    print("\t2. k-means clustering (Silhouette method)")
    time_start = time.time()

    clustering.kmeans_clustering(data_for_clustering,
                                 "initial data",
                                 "k-means with Silhouette",
                                 clusters_number=clustering.silhouette_method(
                                     data_for_clustering,
                                     "clustering/initial data/k-means "
                                     "with Silhouette"))
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/initial data/"
          "k-means with Silhouette/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using k-means algorithm with
    # Elbow (Knee) method for clusters number choosing
    print("\t3. k-means clustering (Elbow (Knee) method)")
    time_start = time.time()

    clustering.kmeans_clustering(data_for_clustering,
                                 "initial data",
                                 "k-means with Elbow",
                                 clusters_number=clustering.elbow_method(
                                     data_for_clustering,
                                     "clustering/initial data/k-means "
                                     "with Elbow"))
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/initial data/"
          "k-means with Elbow/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Topic modeling using LDA
    print("\t4. Performing LDA")
    time_start = time.time()

    lda_results, topics_number = \
        clustering.lda_performing(data_for_lda, customer_ids, category_names)

    time_end = (time.time() - time_start) / 60
    print("\t   Data folder: data/output/")
    print("\t   Images folder: plots/LDA/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using k-means algorithm with LDA topics
    print("\t5. k-means clustering using LDA topics")
    time_start = time.time()

    clustering.kmeans_clustering(
        data_for_clustering,
        "initial data",
        "k-means with LDA",
        clusters_number=topics_number,
        auxiliary_data=lda_results.drop(columns=["dominant_topic"]))
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/initial data/"
          "k-means with LDA/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using dominant topics as cluster labels
    print("\t6. Clustering using dominant LDA topics")
    time_start = time.time()

    clustering.topic_clustering(data_for_clustering, lda_results,
                                topics_number)
    data_for_clustering = data_for_clustering.drop(columns='dominant_topic')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/initial data/"
          "clustering by LDA topics/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using Gaussian mixture model with Bayes classifier
    print("\t7. Gaussian mixture clustering")
    time_start = time.time()

    clustering.gaussian_mixture(data_for_clustering, "initial data")
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/initial data/"
          "gaussian mixture/")
    print("\t   Done. With time " + str(time_end) + " min")

    return

예제 #14

0

파일 보기

valid_vocab = vocab[args.lang_to_cluster]
valid_vocab = [w for w in valid_vocab if w in embeddings]

# cluster vocabulary embeddings from 1 language only
print("\nComputing word weights using", args.weight_type)
weight_type = args.weight_type
vocab_scores = get_vocab_scores(valid_docs, valid_vocab, score_type=weight_type)
valid_vocab = list(vocab_scores.keys())
emb_to_cluster = np.array([embeddings[w] for w in valid_vocab])
weights = np.array([vocab_scores[w] for w in valid_vocab])
print("Valid vocab:", len(valid_vocab))

print("\nClustering vocab words")
if args.cluster_method == 'kmeans':
    if args.weight_type != 'none':
        labels, centers, model = kmeans_clustering(vocab_embeddings=emb_to_cluster, vocab=valid_vocab, n_topics=args.n_clusters, weights=weights)
    else:
        labels, centers, model = kmeans_clustering(vocab_embeddings=emb_to_cluster, vocab=valid_vocab, n_topics=args.n_clusters)

elif args.cluster_method == 'affprop':
    if args.weight_type != 'none':
        labels, centers = affprop_clustering(vocab_embeddings=emb_to_cluster, vocab=valid_vocab, weights=weights)
    else:
        labels, centers = affprop_clustering(vocab_embeddings=emb_to_cluster, vocab=valid_vocab)

elif args.cluster_method == 'gmm':
    labels, means, cov = GMM_clustering(vocab_embeddings=emb_to_cluster, vocab=valid_vocab, cov_type=args.cov_type,
                                        n_topics=args.n_clusters)

else:
    labels, centers, model = kmeans_clustering(vocab_embeddings=emb_to_cluster, vocab=valid_vocab,

예제 #15

0

파일 보기

파일: kmeans_clustering_3k.py 프로젝트: PankovIlya/at

import sys

sys.path.append('../../3_closest_pairs_&_clustering_algorithms')

import data.load_clusters as lc
import clustering as clr
import data.cluster as cl
import alg_clusters_matplotlib as cp



data_table = lc.load_data_table(lc.DATA_3108_URL) #DATA_3108_URL
    
singleton_list = []
for line in data_table:
    singleton_list.append(cl.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))


cluster_list = clr.kmeans_clustering(singleton_list, 7, 7)

cp.plot_clusters(data_table, cluster_list, True)

예제 #16

0

파일 보기

    w = np.array([vocab_scores[lang][w] for w in valid_vocab[lang]])
    combined_weights.append(w)
    combined_vocab.extend(valid_vocab[lang])

combined_emb = np.vstack(combined_emb)
combined_weights = np.concatenate(combined_weights)
print("Embs to cluster:", combined_emb.shape)
print("Weights:", combined_weights.shape)
print("Combined vocab:", len(combined_vocab))

print("\nClustering vocab words")
if args.cluster_method == 'kmeans':
    if args.weight_type != 'none':
        labels, centers, model = kmeans_clustering(
            vocab_embeddings=combined_emb,
            vocab=combined_vocab,
            topics=args.n_clusters,
            weights=combined_weights)
    else:
        labels, centers, model = kmeans_clustering(
            vocab_embeddings=combined_emb,
            vocab=combined_vocab,
            topics=args.n_clusters)
    save_path = "/proj/zosa/results/cldr/"
    dump_file = "denews_" + args.emb_type + "_" + args.cluster_method + "_" + args.weight_type + "_" + str(
        args.n_clusters) + "clusters" + "_bilingual.pkl"
    dump_file_model = dump_file[:-4] + "_model.pkl"
    results = {'labels': labels, 'centers': centers, 'vocab': vocab_scores}
    with open(save_path + dump_file, 'wb') as f:
        pickle.dump(results, f)
        f.close()

예제 #17

0

파일 보기

파일: main.py 프로젝트: Manneq/Raiffeisenbank_Clients_Clustering

def embedded_data_clustering(data_for_clustering, customer_ids, name):
    """
        Function to cluster raw dataset using:
            1. DBSCAN algorithm
            2. OPTICS algorithm
            3. k-means algorithm
            4. Gaussian mixture model with Bayes classifier
        param:
            1. data_for_clustering - pandas DataFrame (10000, 82), where
                values are the mean spendings of customers for every
                category
            2. customer_ids - numpy array (10000, ) with all customer ids
            3. name - string that represents name of embedding algorithm
    """
    # Data embedding
    print("\t0. Transforming data using " + name + " method")
    time_start = time.time()

    data_embedded = \
        dimensionality_reduction.data_embedding(data_for_clustering,
                                                customer_ids, name)

    time_end = (time.time() - time_start) / 60
    print("\t   Data folder: data/output/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using DBSCAN algorithm
    print("\t1. DBSCAN clustering with DMDBSCAN method")
    time_start = time.time()

    clustering.dbscan_clustering(data_for_clustering,
                                 name,
                                 auxiliary_data=data_embedded)
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/" + name +
          "/DBSCAN with DMDBSCAN/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using OPTICS algorithm
    print("\t2. OPTICS clustering")
    time_start = time.time()

    clustering.optics_clustering(data_for_clustering,
                                 name,
                                 auxiliary_data=data_embedded)
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/" + name + "/OPTICS/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using k-means algorithm with
    # Silhouette method for clusters number choosing
    print("\t3. k-means clustering (Silhouette method)")
    time_start = time.time()

    clustering.kmeans_clustering(data_for_clustering,
                                 name,
                                 "k-means with Silhouette",
                                 clusters_number=clustering.silhouette_method(
                                     data_embedded, "clustering/" + name +
                                     "/k-means with Silhouette"),
                                 auxiliary_data=data_embedded)
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/" + name +
          "/k-means with Silhouette/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using k-means algorithm with
    # Elbow (Knee) method for clusters number choosing
    print("\t4. k-means clustering (Elbow (Knee) method)")
    time_start = time.time()

    clustering.kmeans_clustering(data_for_clustering,
                                 name,
                                 "k-means with Elbow",
                                 clusters_number=clustering.elbow_method(
                                     data_embedded, "clustering/" + name +
                                     "/k-means with Elbow"),
                                 auxiliary_data=data_embedded)
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/" + name +
          "/k-means with Elbow/")
    print("\t   Done. With time " + str(time_end) + " min")

    # Clustering using Gaussian mixture model with Bayes classifier
    print("\t5. Gaussian mixture clustering")
    time_start = time.time()

    clustering.gaussian_mixture(data_for_clustering,
                                name,
                                auxiliary_data=data_embedded)
    data_for_clustering = data_for_clustering.drop(columns='cluster')

    time_end = (time.time() - time_start) / 60
    print("\t   Images folder: plots/clustering/" + name +
          "/gaussian mixture/")
    print("\t   Done. With time " + str(time_end) + " min")

    return