コード例 #1
0
    def random_state(kmin, kmax, algorithm, random_state, ccore_flag):
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE4)

        ksearch_instance_1 = silhouette_ksearch(sample,
                                                kmin,
                                                kmax,
                                                algorithm=algorithm,
                                                random_state=random_state,
                                                ccore=ccore_flag).process()

        ksearch_instance_2 = silhouette_ksearch(sample,
                                                kmin,
                                                kmax,
                                                algorithm=algorithm,
                                                random_state=random_state,
                                                ccore=ccore_flag).process()

        assertion.eq(ksearch_instance_1.get_amount(),
                     ksearch_instance_2.get_amount())
        assertion.eq(ksearch_instance_1.get_score(),
                     ksearch_instance_2.get_score())
        assertion.eq(len(ksearch_instance_1.get_scores()),
                     len(ksearch_instance_2.get_scores()))

        scores1 = ksearch_instance_1.get_scores()
        scores2 = ksearch_instance_2.get_scores()
        for key in scores1:
            key = int(key)
            if math.isnan(scores1[key]) and math.isnan(scores2[key]):
                continue
            else:
                assertion.eq(scores1[key], scores2[key])
コード例 #2
0
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm,
                        ccore_flag):
        attempts = 15
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample,
                                                  kmin,
                                                  kmax,
                                                  algorithm=algorithm,
                                                  ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
コード例 #3
0
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag):
        attempts = 10
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
コード例 #4
0
ファイル: clusterization.py プロジェクト: WM-SEMERU/ds4se
def find_best_k(samples):
    search_instance = silhouette_ksearch(samples, 2, 10, algorithm=silhouette_ksearch_type.KMEDOIDS).process()
    amount = search_instance.get_amount()
    scores = search_instance.get_scores()

    print(f"Best Silhouette Score for k = {amount}: {scores[amount]}")

    return amount
コード例 #5
0
def find_best_k(samples):
    logging.info("Searching best k for clustering.")
    search_instance = silhouette_ksearch(samples, 2, 10, algorithm=silhouette_ksearch_type.KMEDOIDS).process()
    amount = search_instance.get_amount()
    scores = search_instance.get_scores()

    logging.info(f"Best Silhouette Score for k = {amount}: {scores[amount]}")

    return amount
コード例 #6
0
def use_experiment_with_pyclustering_kmedoids(file):
    TimingLogger.start('pyclustering.kmedoids', 'kmedoids')
    x = read_matrix(file)
    X = np.array(x)
    clusters = len(x)
    search_instance = silhouette_ksearch(
        X, 2, clusters, algorithm=silhouette_ksearch_type.KMEDOIDS).process()
    scores = search_instance.get_scores()
    for i in range(2, len(scores)):
        logging.info('For n_clusters = ' + str(i) +
                     ' The average silhouette_score is : ' +
                     str(scores[i - 2]))
    logging.info('Better choice is ' + str(search_instance.get_amount()) +
                 ' clusters')
    TimingLogger.stop('pyclustering.kmedoids')
コード例 #7
0
def find_optimal_amout_clusters(sample_path, kmin, kmax, algorithm):
    sample = read_sample(sample_path)
    search_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm).process()

    amount = search_instance.get_amount()
    scores = search_instance.get_scores()

    print("Sample: '%s', Scores: '%s'" % (sample_path, str(scores)))

    initial_centers = kmeans_plusplus_initializer(sample, amount).initialize()
    kmeans_instance = kmeans(sample, initial_centers).process()

    clusters = kmeans_instance.get_clusters()

    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, sample)
    visualizer.show()
コード例 #8
0
def find_optimal_amout_clusters(sample_path, kmin, kmax, algorithm):
    sample = read_sample(sample_path)
    search_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm).process()

    amount = search_instance.get_amount()
    scores = search_instance.get_scores()

    print("Sample: '%s', Scores: '%s'" % (sample_path, str(scores)))

    initial_centers = kmeans_plusplus_initializer(sample, amount).initialize()
    kmeans_instance = kmeans(sample, initial_centers).process()

    clusters = kmeans_instance.get_clusters()

    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, sample)
    visualizer.show()
コード例 #9
0
    def template_correct_ksearch(self, sample_path, answer_path, kmin, kmax,
                                 algorithm):
        attempts = 5
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(
                sample, kmin, kmax, algorithm=algorithm).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            if amount != len(clusters): continue
            testing_result = True
            break

        assertion.true(testing_result)
コード例 #10
0
from pyclustering.cluster.kmeans import kmeans
from pyclustering.cluster.silhouette import silhouette
from pyclustering.cluster.silhouette import silhouette_ksearch_type, silhouette_ksearch
from pyclustering.utils.metric import distance_metric, type_metric

# Prepare initial centers
centers = kmeans_plusplus_initializer(data, 4).initialize()
manhattan_metric = distance_metric(type_metric.MANHATTAN)
kmeans_instance = kmeans(data, centers, metric = manhattan_metric)
kmeans_instance.process()
clusters = kmeans_instance.get_clusters()
# # Calculate Silhouette score
# score = silhouette(data, clusters).process().get_score()

# %%
search_instance = silhouette_ksearch(data, 2, 10, algorithm=silhouette_ksearch_type.KMEANS).process()
amount = search_instance.get_amount()
scores = search_instance.get_scores()
print("Scores: '%s'" % str(scores))


#%%
 
scores


# %%
import numpy as np 
import matplotlib.pyplot as plt

x = np.arange(2, 10, 1)