Beispiel #1
0
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm,
                        ccore_flag):
        attempts = 15
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample,
                                                  kmin,
                                                  kmax,
                                                  algorithm=algorithm,
                                                  ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
Beispiel #2
0
    def assert_distribution(self, data, sizes, centers, widths):
        index_cluster = 0
        index_cluster_point = 0

        actual_means = [[0.0 for _ in range(len(data[0]))]
                        for _ in range(len(sizes))]

        for index_point in range(len(data)):
            for index_dimension in range(len(data[0])):
                actual_means[index_cluster][index_dimension] += data[
                    index_point][index_dimension]

            index_cluster_point += 1
            if index_cluster_point == sizes[index_cluster]:
                index_cluster_point = 0
                index_cluster += 1

        for index_cluster in range(len(actual_means)):
            for index_dimension in range(len(data[0])):
                actual_means[index_cluster][index_dimension] /= sizes[
                    index_cluster]
                assertion.ge(
                    centers[index_cluster][index_dimension],
                    actual_means[index_cluster][index_dimension] -
                    widths[index_cluster])
                assertion.le(
                    centers[index_cluster][index_dimension],
                    actual_means[index_cluster][index_dimension] +
                    widths[index_cluster])
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag):
        attempts = 10
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
Beispiel #4
0
    def templateLengthProcessData(input_sample, start_centers, expected_cluster_length, type_splitting, kmax, ccore, **kwargs):
        if isinstance(input_sample, str):
            sample = read_sample(input_sample)
        else:
            sample = input_sample

        xmeans_instance = xmeans(sample, start_centers, kmax, 0.025, type_splitting, ccore, **kwargs)
        xmeans_instance.process()
         
        clusters = xmeans_instance.get_clusters()
        centers = xmeans_instance.get_centers()
        wce = xmeans_instance.get_total_wce()
    
        obtained_cluster_sizes = [len(cluster) for cluster in clusters]

        assertion.eq(len(sample), sum(obtained_cluster_sizes))
        assertion.eq(len(clusters), len(centers))
        assertion.le(len(centers), kmax)

        expected_wce = 0.0
        metric = distance_metric(type_metric.EUCLIDEAN_SQUARE)
        for index_cluster in range(len(clusters)):
            for index_point in clusters[index_cluster]:
                expected_wce += metric(sample[index_point], centers[index_cluster])

        assertion.eq(expected_wce, wce)

        if expected_cluster_length is not None:
            assertion.eq(len(centers), len(expected_cluster_length))

            obtained_cluster_sizes.sort()
            expected_cluster_length.sort()
            
            assertion.eq(obtained_cluster_sizes, expected_cluster_length)
Beispiel #5
0
    def template_correct_scores(self, sample_path, answer_path):
        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
    def correct_scores(sample_path, answer_path, ccore_flag):
        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters, ccore=ccore_flag).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
    def templateKmeasPlusPlusCenterInitializerIndexReturn(self, data, amount):
        centers = kmeans_plusplus_initializer(data, amount).initialize(return_index=True)

        assertion.eq(amount, len(centers))

        for center_index in centers:
            assertion.gt(len(data), center_index)
            assertion.le(0, center_index)

        return centers
    def templateKmeasPlusPlusCenterInitializerIndexReturn(self, data, amount):
        centers = kmeans_plusplus_initializer(data, amount).initialize(return_index=True)

        assertion.eq(amount, len(centers))

        for center_index in centers:
            assertion.gt(len(data), center_index)
            assertion.le(0, center_index)
            assertion.eq(1, centers.count(center_index))

        return centers
Beispiel #9
0
    def templateClusterAllocationOneDimensionData(ccore_flag):
        input_data = [[0.0] for _ in range(10)] + [[5.0] for _ in range(10)] + [[10.0] for _ in range(10)] + [[15.0] for _ in range(10)]
            
        xmeans_instance = xmeans(input_data, [[0.5], [5.5], [10.5], [15.5]], 20, 0.025, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore_flag)
        xmeans_instance.process()
        
        clusters = xmeans_instance.get_clusters()
        centers = xmeans_instance.get_centers()

        assertion.eq(len(clusters), 4)
        assertion.eq(len(centers), len(clusters))
        
        assertion.le(len(clusters), 20)
        for cluster in clusters:
            assertion.eq(len(cluster), 10)
Beispiel #10
0
    def correct_scores(sample_path, answer_path, ccore_flag, **kwargs):
        data_type = kwargs.get('data_type', 'points')

        sample = read_sample(sample_path)
        if data_type == 'distance_matrix':
            sample = calculate_distance_matrix(sample, distance_metric(type_metric.EUCLIDEAN_SQUARE))

        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters, ccore=ccore_flag, data_type=data_type).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)

        return scores
Beispiel #11
0
    def template_correct_ksearch(self, sample_path, answer_path, kmin, kmax,
                                 algorithm):
        attempts = 5
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(
                sample, kmin, kmax, algorithm=algorithm).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            if amount != len(clusters): continue
            testing_result = True
            break

        assertion.true(testing_result)
 def templateCollectEvolution(filename, initial_centers, number_clusters, ccore_flag):
     sample = read_sample(filename)
     
     observer = kmeans_observer()
     kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer)
     kmeans_instance.process()
     
     assertion.le(1, len(observer))
     for i in range(len(observer)):
         assertion.le(1, len(observer.get_centers(i)))
         for center in observer.get_centers(i):
             assertion.eq(len(sample[0]), len(center))
         
         assertion.le(1, len(observer.get_clusters(i)))
Beispiel #13
0
 def templateCollectEvolution(filename, initial_centers, number_clusters, ccore_flag):
     sample = read_sample(filename);
     
     observer = kmeans_observer();
     kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer);
     kmeans_instance.process();
     
     assertion.le(1, len(observer));
     for i in range(len(observer)):
         assertion.le(1, len(observer.get_centers(i)));
         for center in observer.get_centers(i):
             assertion.eq(len(sample[0]), len(center));
         
         assertion.le(1, len(observer.get_clusters(i)));