Ejemplo n.º 1
0
    def clustering(path, threshold1, threshold2, expected, ccore, **kwargs):
        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN))

        sample = read_sample(path)

        ttsas_instance = ttsas(sample,
                               threshold1,
                               threshold2,
                               ccore=ccore,
                               metric=metric)
        ttsas_instance.process()

        clusters = ttsas_instance.get_clusters()
        representatives = ttsas_instance.get_representatives()

        obtained_length = 0
        obtained_cluster_length = []
        for cluster in clusters:
            obtained_length += len(cluster)
            obtained_cluster_length.append(len(cluster))

        assertion.eq(len(sample), obtained_length)
        assertion.eq(len(expected), len(clusters))
        assertion.eq(len(expected), len(representatives))
        assertion.ge(len(sample), len(clusters))

        dimension = len(sample[0])
        for rep in representatives:
            assertion.eq(dimension, len(rep))

        expected.sort()
        obtained_cluster_length.sort()

        assertion.eq(expected, obtained_cluster_length)
Ejemplo n.º 2
0
    def assert_distribution(self, data, sizes, centers, widths):
        index_cluster = 0
        index_cluster_point = 0

        actual_means = [[0.0 for _ in range(len(data[0]))]
                        for _ in range(len(sizes))]

        for index_point in range(len(data)):
            for index_dimension in range(len(data[0])):
                actual_means[index_cluster][index_dimension] += data[
                    index_point][index_dimension]

            index_cluster_point += 1
            if index_cluster_point == sizes[index_cluster]:
                index_cluster_point = 0
                index_cluster += 1

        for index_cluster in range(len(actual_means)):
            for index_dimension in range(len(data[0])):
                actual_means[index_cluster][index_dimension] /= sizes[
                    index_cluster]
                assertion.ge(
                    centers[index_cluster][index_dimension],
                    actual_means[index_cluster][index_dimension] -
                    widths[index_cluster])
                assertion.le(
                    centers[index_cluster][index_dimension],
                    actual_means[index_cluster][index_dimension] +
                    widths[index_cluster])
Ejemplo n.º 3
0
    def templateMaxAllocatedClusters(ccore_flag, amount_clusters, size_cluster,
                                     offset, kinitial, kmax):
        input_data = []
        for index in range(amount_clusters):
            for _ in range(size_cluster):
                input_data.append([
                    random.random() * index * offset,
                    random.random() * index * offset
                ])

        initial_centers = random_center_initializer(input_data,
                                                    kinitial).initialize()
        xmeans_instance = xmeans(input_data, initial_centers, kmax, 0.025,
                                 splitting_type.BAYESIAN_INFORMATION_CRITERION,
                                 ccore_flag)
        xmeans_instance.process()

        clusters = xmeans_instance.get_clusters()
        centers = xmeans_instance.get_centers()

        if len(clusters) != len(centers):
            print(input_data)
            print(initial_centers)

        assertion.ge(kmax, len(clusters))
        assertion.ge(kmax, len(centers))
        assertion.eq(len(clusters), len(centers))
Ejemplo n.º 4
0
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm,
                        ccore_flag):
        attempts = 15
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample,
                                                  kmin,
                                                  kmax,
                                                  algorithm=algorithm,
                                                  ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
Ejemplo n.º 5
0
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag):
        attempts = 10
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
Ejemplo n.º 6
0
    def clustering(path, amount, threshold, expected, ccore, **kwargs):
        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN));

        sample = read_sample(path);

        mbsas_instance = mbsas(sample, amount, threshold, ccore=ccore, metric=metric);
        mbsas_instance.process();

        clusters = mbsas_instance.get_clusters();
        representatives = mbsas_instance.get_representatives();

        obtained_length = 0;
        obtained_cluster_length = [];
        for cluster in clusters:
            obtained_length += len(cluster);
            obtained_cluster_length.append(len(cluster));

        assertion.eq(len(sample), obtained_length);
        assertion.eq(len(expected), len(clusters));
        assertion.eq(len(expected), len(representatives));
        assertion.ge(amount, len(clusters));

        dimension = len(sample[0]);
        for rep in representatives:
            assertion.eq(dimension, len(rep));

        expected.sort();
        obtained_cluster_length.sort();

        assertion.eq(expected, obtained_cluster_length);
Ejemplo n.º 7
0
    def clustering(path, amount, threshold, expected, ccore, **kwargs):
        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN));

        sample = read_sample(path);

        bsas_instance = bsas(sample, amount, threshold, ccore=ccore, metric=metric);
        bsas_instance.process();

        clusters = bsas_instance.get_clusters();
        representatives = bsas_instance.get_representatives();

        obtained_length = 0;
        obtained_cluster_length = [];
        for cluster in clusters:
            obtained_length += len(cluster);
            obtained_cluster_length.append(len(cluster));

        assertion.eq(len(sample), obtained_length);
        assertion.eq(len(expected), len(clusters));
        assertion.eq(len(expected), len(representatives));
        assertion.ge(amount, len(clusters));

        dimension = len(sample[0]);
        for rep in representatives:
            assertion.eq(dimension, len(rep));

        expected.sort();
        obtained_cluster_length.sort();

        assertion.eq(expected, obtained_cluster_length);
Ejemplo n.º 8
0
    def template_correct_scores(self, sample_path, answer_path):
        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
Ejemplo n.º 9
0
    def correct_scores(sample_path, answer_path, ccore_flag):
        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters, ccore=ccore_flag).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
Ejemplo n.º 10
0
    def templateClusteringResultsSpecificData(data_type, path, radius,
                                              neighbors, amount_clusters,
                                              expected_length_clusters, ccore):
        sample = read_sample(path)

        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)
        else:
            input_data = sample

        optics_instance = optics(input_data,
                                 radius,
                                 neighbors,
                                 amount_clusters,
                                 ccore,
                                 data_type=data_type)
        optics_instance.process()

        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()
        optics_objects = optics_instance.get_optics_objects()

        object_indexes = set([obj.index_object for obj in optics_objects])
        assertion.eq(len(optics_objects), len(object_indexes))
        for obj in optics_objects:
            if obj.core_distance is not None:
                assertion.ge(obj.core_distance, 0)

            if obj.reachability_distance is not None:
                assertion.ge(obj.reachability_distance, 0)

        assert sum([len(cluster)
                    for cluster in clusters]) + len(noise) == len(sample)
        assert len(clusters) == len(expected_length_clusters)
        assert sum([len(cluster)
                    for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster) for cluster in clusters
                       ]) == sorted(expected_length_clusters)

        if amount_clusters is not None:
            analyser = ordering_analyser(optics_instance.get_ordering())
            assert len(analyser) > 0

            amount_clusters, borders = analyser.extract_cluster_amount(
                optics_instance.get_radius())
            assert amount_clusters == len(expected_length_clusters)
            assert len(borders) == amount_clusters - 1
Ejemplo n.º 11
0
    def correct_scores(sample_path, answer_path, ccore_flag, **kwargs):
        data_type = kwargs.get('data_type', 'points')

        sample = read_sample(sample_path)
        if data_type == 'distance_matrix':
            sample = calculate_distance_matrix(sample, distance_metric(type_metric.EUCLIDEAN_SQUARE))

        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters, ccore=ccore_flag, data_type=data_type).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)

        return scores
Ejemplo n.º 12
0
    def templateMaxAllocatedClusters(ccore_flag, amount_clusters, size_cluster, offset, kinitial, kmax):
        input_data = []
        for index in range(amount_clusters):
            for _ in range(size_cluster):
                input_data.append([random.random() * index * offset, random.random() * index * offset])
        
        initial_centers = random_center_initializer(input_data, kinitial).initialize()
        xmeans_instance = xmeans(input_data, initial_centers, kmax, 0.025, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore_flag)
        xmeans_instance.process()
        
        clusters = xmeans_instance.get_clusters()
        centers = xmeans_instance.get_centers()

        if len(clusters) != len(centers):
            print(input_data)
            print(initial_centers)

        assertion.ge(kmax, len(clusters))
        assertion.ge(kmax, len(centers))
        assertion.eq(len(clusters), len(centers))
Ejemplo n.º 13
0
    def template_correct_ksearch(self, sample_path, answer_path, kmin, kmax,
                                 algorithm):
        attempts = 5
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(
                sample, kmin, kmax, algorithm=algorithm).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            if amount != len(clusters): continue
            testing_result = True
            break

        assertion.true(testing_result)
Ejemplo n.º 14
0
    def templateClusteringResultsSpecificData(data_type, path, radius, neighbors, amount_clusters, expected_length_clusters, ccore):
        sample = read_sample(path)

        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)
        else:
            input_data = sample

        optics_instance = optics(input_data, radius, neighbors, amount_clusters, ccore, data_type=data_type)
        optics_instance.process()

        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()
        optics_objects = optics_instance.get_optics_objects()

        object_indexes = set( [ obj.index_object for obj in optics_objects ] )
        assertion.eq(len(optics_objects), len(object_indexes))
        for obj in optics_objects:
            if obj.core_distance is not None:
                assertion.ge(obj.core_distance, 0)

            if obj.reachability_distance is not None:
                assertion.ge(obj.reachability_distance, 0)

        assert sum([len(cluster) for cluster in clusters]) + len(noise) == len(sample)
        assert len(clusters) == len(expected_length_clusters)
        assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster) for cluster in clusters]) == sorted(expected_length_clusters)

        if amount_clusters is not None:
            analyser = ordering_analyser(optics_instance.get_ordering())
            assert len(analyser) > 0

            amount_clusters, borders = analyser.extract_cluster_amount(optics_instance.get_radius())
            assert amount_clusters == len(expected_length_clusters)
            assert len(borders) == amount_clusters - 1