コード例 #1
0
def elbow_analysis(sample_file_path, kmin, kmax, **kwargs):
    initializer = kwargs.get('initializer', kmeans_plusplus_initializer)
    sample = read_sample(sample_file_path)

    elbow_instance = elbow(sample, kmin, kmax, initializer=initializer)
    elbow_instance.process()

    amount_clusters = elbow_instance.get_amount()
    wce = elbow_instance.get_wce()

    centers = kmeans_plusplus_initializer(sample, amount_clusters).initialize()
    kmeans_instance = kmeans(sample, centers)
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()

    print("Sample '%s': Obtained amount of clusters: '%d'." % (sample_file_path, amount_clusters))

    figure = plt.figure(1)
    ax = figure.add_subplot(111)
    ax.plot(range(kmin, kmax), wce, color='b', marker='.')
    ax.plot(amount_clusters, wce[amount_clusters - kmin], color='r', marker='.', markersize=10)
    ax.annotate("Elbow", (amount_clusters + 0.1, wce[amount_clusters - kmin] + 5))
    ax.grid(True)
    plt.ylabel("WCE")
    plt.xlabel("K")
    plt.show()

    kmeans_visualizer.show_clusters(sample, clusters, centers)
コード例 #2
0
def elbow_analysis(sample_file_path, kmin, kmax, **kwargs):
    initializer = kwargs.get('initializer', kmeans_plusplus_initializer)
    sample = read_sample(sample_file_path)

    elbow_instance = elbow(sample, kmin, kmax, initializer=initializer)
    elbow_instance.process()

    amount_clusters = elbow_instance.get_amount()
    wce = elbow_instance.get_wce()

    centers = kmeans_plusplus_initializer(sample, amount_clusters).initialize()
    kmeans_instance = kmeans(sample, centers)
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()

    print("Sample '%s': Obtained amount of clusters: '%d'." %
          (sample_file_path, amount_clusters))

    figure = plt.figure(1)
    ax = figure.add_subplot(111)
    ax.plot(range(kmin, kmax), wce, color='b', marker='.')
    ax.plot(amount_clusters,
            wce[amount_clusters - kmin],
            color='r',
            marker='.',
            markersize=10)
    ax.annotate("Elbow",
                (amount_clusters + 0.1, wce[amount_clusters - kmin] + 5))
    ax.grid(True)
    plt.ylabel("WCE")
    plt.xlabel("K")
    plt.show()

    kmeans_visualizer.show_clusters(sample, clusters, centers)
コード例 #3
0
def template_clustering(start_centers, path, tolerance=0.25, ccore=False):
    sample = read_sample(path)
    dimension = len(sample[0])

    metric = distance_metric(type_metric.MANHATTAN)

    observer = kmeans_observer()
    kmeans_instance = kmeans(sample,
                             start_centers,
                             tolerance,
                             ccore,
                             observer=observer,
                             metric=metric)
    (ticks, _) = timedcall(kmeans_instance.process)

    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    visualizer = cluster_visualizer_multidim()
    visualizer.append_clusters(clusters, sample)
    visualizer.show()

    if dimension > 3:
        kmeans_visualizer.show_clusters(sample, clusters, centers,
                                        start_centers)
        kmeans_visualizer.animate_cluster_allocation(sample, observer)
コード例 #4
0
    def templateShowClusteringResultNoFailure(filename, initial_centers, ccore_flag):
        sample = read_sample(filename)

        kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag)
        kmeans_instance.process()

        clusters = kmeans_instance.get_clusters()
        centers = kmeans_instance.get_centers()

        kmeans_visualizer.show_clusters(sample, clusters, centers, initial_centers)
コード例 #5
0
    def templateShowClusteringResultNoFailure(filename, initial_centers, ccore_flag):
        sample = read_sample(filename);

        kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag);
        kmeans_instance.process();

        clusters = kmeans_instance.get_clusters();
        centers = kmeans_instance.get_centers();

        kmeans_visualizer.show_clusters(sample, clusters, centers, initial_centers);
コード例 #6
0
def template_clustering(start_centers, path, tolerance=0.25, ccore=True):
    sample = read_sample(path)

    observer = kmeans_observer()
    kmeans_instance = kmeans(sample,
                             start_centers,
                             tolerance,
                             ccore,
                             observer=observer)
    (ticks, _) = timedcall(kmeans_instance.process)

    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    kmeans_visualizer.show_clusters(sample, clusters, centers, start_centers)
    kmeans_visualizer.animate_cluster_allocation(sample, observer)
コード例 #7
0
def run_elbow(data):
    # create instance of Elbow method using K value from 1 to 10.
    kmin, kmax = 1, 10
    elbow_instance = elbow(data, kmin, kmax)
    # process input data and obtain results of analysis
    elbow_instance.process()
    amount_clusters = elbow_instance.get_amount()

    # perform cluster analysis using K-Means algorithm
    centers = kmeans_plusplus_initializer(
        data, amount_clusters,
        amount_candidates=kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()
    
    kmeans_instance = kmeans(data, centers)
    kmeans_instance.process()
    
    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()
    kmeans_visualizer.show_clusters(data, clusters, centers)
コード例 #8
0
def get_cluster_custom(sample, visualize=False, class_num=3):
    metric = distance_metric(type_metric.USER_DEFINED, func=user_function)

    # create K-Means algorithm with specific distance metric
    initial_centers = kmeans_plusplus_initializer(sample,
                                                  class_num).initialize()
    kmeans_instance = kmeans(sample, initial_centers, metric=metric)

    # run cluster analysis and obtain results
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    cls_encoded = []
    for cls_idx in clusters:
        cls_encoded.append(sample[cls_idx])
    final_centers = kmeans_instance.get_centers()
    for i in range(len(final_centers)):
        final_centers[i] = np.round(np.array(final_centers[i]))
    loss = kmeans_instance.get_total_wce()
    # Visualize obtained results
    if visualize:
        kmeans_visualizer.show_clusters(sample, clusters, final_centers)
    return cls_encoded, final_centers, loss
コード例 #9
0
def template_clustering(start_centers, path, tolerance = 0.25, ccore = False):
    sample = read_sample(path)
    dimension = len(sample[0])

    metric = distance_metric(type_metric.MANHATTAN)

    observer = kmeans_observer()
    kmeans_instance = kmeans(sample, start_centers, tolerance, ccore, observer=observer, metric=metric)
    (ticks, _) = timedcall(kmeans_instance.process)
    
    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()
    
    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    visualizer = cluster_visualizer_multidim()
    visualizer.append_clusters(clusters, sample)
    visualizer.show()

    if dimension > 3:
        kmeans_visualizer.show_clusters(sample, clusters, centers, start_centers)
        kmeans_visualizer.animate_cluster_allocation(sample, observer)
コード例 #10
0
def elbow_kmeans_optimizer(X, k=None, kmin=1, kmax=5, visualize=True):
    """k-means clustering with or without automatically determined cluster numbers. 
    Reference: https://pyclustering.github.io/docs/0.8.2/html/d3/d70/classpyclustering_1_1cluster_1_1elbow_1_1elbow.html
    
    # Arguments:
        X (numpy array-like): Input data matrix.
        kmin: Minimum number of clusters to consider. Defaults to 1.
        kmax: Maximum number of clusters to consider. Defaults to 5.
        visualize: Whether to perform k-means visualization or not.
    
    # Returns:
        numpy arraylike: Clusters.
        numpy arraylike: Cluster centers.
    """
    from pyclustering.utils import read_sample
    from pyclustering.samples.definitions import SIMPLE_SAMPLES
    from pyclustering.cluster.kmeans import kmeans
    from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer, random_center_initializer
    from pyclustering.core.wrapper import ccore_library
    from pyclustering.cluster.elbow import elbow
    from pyclustering.cluster.kmeans import kmeans_visualizer
    import pyclustering.core.elbow_wrapper as wrapper
    if k is not None:
        amount_clusters = k
    else:
        elbow_instance = elbow(X, kmin, kmax)
        elbow_instance.process()
        amount_clusters = elbow_instance.get_amount()
        wce = elbow_instance.get_wce()
    centers = kmeans_plusplus_initializer(X, amount_clusters).initialize()
    kmeans_instance = kmeans(X, centers)
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()
    kmeans_visualizer.show_clusters(X, clusters, centers)
    return clusters, centers
コード例 #11
0
ファイル: k-means_ex1.py プロジェクト: lgodi/Dm
# Load list of points for cluster analysis.
sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS)

# Prepare initial centers using K-Means++ method.
initial_centers = kmeans_plusplus_initializer(sample, 2).initialize()

# Create instance of K-Means algorithm with prepared centers.
kmeans_instance = kmeans(sample, initial_centers)

# Run cluster analysis and obtain results.
kmeans_instance.process()
clusters = kmeans_instance.get_clusters()
final_centers = kmeans_instance.get_centers()

# Visualize obtained results
kmeans_visualizer.show_clusters(sample, clusters, final_centers)
from pyclustering.cluster.kmedoids import kmedoids
from pyclustering.cluster import cluster_visualizer
from pyclustering.utils import read_sample
from pyclustering.samples.definitions import FCPS_SAMPLES

# Load list of points for cluster analysis.
sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS)

# Set random initial medoids.
initial_medoids = [1, 500]

# Create instance of K-Medoids algorithm.
kmedoids_instance = kmedoids(sample, initial_medoids)

# Run cluster analysis and obtain results.
コード例 #12
0
# In[53]:

data_1 = list(data_1)
initial_centers = kmeans_plusplus_initializer(data_1, 10).initialize()

# Create instance of K-Means algorithm with prepared centers.
kmeans_instance = kmeans(data_1, initial_centers)

# Run cluster analysis and obtain results.
kmeans_instance.process()
clusters = kmeans_instance.get_clusters()
final_centers = kmeans_instance.get_centers()

# Visualize obtained results
# visualizer = cluster_visualizer_multidim()
kmeans_visualizer.show_clusters(data_1, clusters, final_centers)
# visualizer.append_clusters(clusters, data_1)
# visualizer.show()

# In[58]:

# 5 initialisations
for m in range(5):
    initial_centers = kmeans_plusplus_initializer(data_1, 10).initialize()

    # Create instance of K-Means algorithm with prepared centers.
    kmeans_instance = kmeans(data_1, initial_centers)

    # Run cluster analysis and obtain results.
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
コード例 #13
0
samples = np.array(list(zip(ws, hs)))
for _ in range(1):
    sample = samples[
        np.random.choice(samples.shape[0], 20000, replace=False), :]
    metric = distance_metric(type_metric.USER_DEFINED, func=iou_distance_wh)
    initial_centers = kmeans_plusplus_initializer(sample, 9).initialize()
    # Create instance of K-Means algorithm with prepared centers.
    kmeans_instance = kmeans(sample, initial_centers, metric=metric)
    # Run cluster analysis and obtain results.
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    final_centers = np.array(kmeans_instance.get_centers())
    # Visualize obtained results
    kmeans_visualizer.show_clusters(sample,
                                    clusters,
                                    final_centers,
                                    display=False)
    sccs = np.round(final_centers[np.argsort(np.prod(final_centers,
                                                     axis=1))]).astype(np.int)
    print(sccs)
# [[  9  13]
#  [ 25  17]
#  [ 16  31]
#  [ 47  29]
#  [ 32  51]
#  [ 83  48]
#  [ 61  91]
#  [131  99]
#  [210 189]]
plt.savefig('results/visdrone_anchors.png')
コード例 #14
0
ファイル: q3.py プロジェクト: nsk06/SMAI
    plt.scatter(reduced_data[i, 0],
                reduced_data[i, 1],
                c=col[int(model_train_label[i])],
                marker='.')

gt_centers = []

for i in range(10):
    tmp = []
    tmp.append(arr_x[i] / arr_cnt[i])
    tmp.append(arr_y[i] / arr_cnt[i])
    gt_centers.append(tmp)
plt.title('sample data')
plt.show()

kmeans_visualizer.show_clusters(inp, gt_clusters, gt_centers)

# Visualize clustering results
visualizer = cluster_visualizer_multidim()
visualizer.append_clusters(gt_clusters, inp, marker='o')
# visualizer.append_cluster(noise, inp, marker='x')
# visualizer.set_canvas_title('original clustering : the ground truth')
visualizer.show()

# Prepare initial centers using K-Means++ method.
initial_centers = kmeans_plusplus_initializer(inp, 10).initialize()

# Create instance of K-Means algorithm with prepared centers.
kmeans_instance = kmeans(inp, initial_centers)

# Run cluster analysis and obtain results.