def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag): attempts = 15 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) upper_limit = len(clusters) + 1 lower_limit = len(clusters) - 1 if lower_limit < 1: lower_limit = 1 if (amount > upper_limit) or (amount < lower_limit): continue testing_result = True break assertion.true(testing_result)
def assert_distribution(self, data, sizes, centers, widths): index_cluster = 0 index_cluster_point = 0 actual_means = [[0.0 for _ in range(len(data[0]))] for _ in range(len(sizes))] for index_point in range(len(data)): for index_dimension in range(len(data[0])): actual_means[index_cluster][index_dimension] += data[ index_point][index_dimension] index_cluster_point += 1 if index_cluster_point == sizes[index_cluster]: index_cluster_point = 0 index_cluster += 1 for index_cluster in range(len(actual_means)): for index_dimension in range(len(data[0])): actual_means[index_cluster][index_dimension] /= sizes[ index_cluster] assertion.ge( centers[index_cluster][index_dimension], actual_means[index_cluster][index_dimension] - widths[index_cluster]) assertion.le( centers[index_cluster][index_dimension], actual_means[index_cluster][index_dimension] + widths[index_cluster])
def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag): attempts = 10 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) upper_limit = len(clusters) + 1 lower_limit = len(clusters) - 1 if lower_limit < 1: lower_limit = 1 if (amount > upper_limit) or (amount < lower_limit): continue testing_result = True break assertion.true(testing_result)
def templateLengthProcessData(input_sample, start_centers, expected_cluster_length, type_splitting, kmax, ccore, **kwargs): if isinstance(input_sample, str): sample = read_sample(input_sample) else: sample = input_sample xmeans_instance = xmeans(sample, start_centers, kmax, 0.025, type_splitting, ccore, **kwargs) xmeans_instance.process() clusters = xmeans_instance.get_clusters() centers = xmeans_instance.get_centers() wce = xmeans_instance.get_total_wce() obtained_cluster_sizes = [len(cluster) for cluster in clusters] assertion.eq(len(sample), sum(obtained_cluster_sizes)) assertion.eq(len(clusters), len(centers)) assertion.le(len(centers), kmax) expected_wce = 0.0 metric = distance_metric(type_metric.EUCLIDEAN_SQUARE) for index_cluster in range(len(clusters)): for index_point in clusters[index_cluster]: expected_wce += metric(sample[index_point], centers[index_cluster]) assertion.eq(expected_wce, wce) if expected_cluster_length is not None: assertion.eq(len(centers), len(expected_cluster_length)) obtained_cluster_sizes.sort() expected_cluster_length.sort() assertion.eq(obtained_cluster_sizes, expected_cluster_length)
def template_correct_scores(self, sample_path, answer_path): sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() scores = silhouette(sample, clusters).process().get_score() assertion.eq(len(sample), len(scores)) for score in scores: assertion.le(-1.0, score) assertion.ge(1.0, score)
def correct_scores(sample_path, answer_path, ccore_flag): sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() scores = silhouette(sample, clusters, ccore=ccore_flag).process().get_score() assertion.eq(len(sample), len(scores)) for score in scores: assertion.le(-1.0, score) assertion.ge(1.0, score)
def templateKmeasPlusPlusCenterInitializerIndexReturn(self, data, amount): centers = kmeans_plusplus_initializer(data, amount).initialize(return_index=True) assertion.eq(amount, len(centers)) for center_index in centers: assertion.gt(len(data), center_index) assertion.le(0, center_index) return centers
def templateKmeasPlusPlusCenterInitializerIndexReturn(self, data, amount): centers = kmeans_plusplus_initializer(data, amount).initialize(return_index=True) assertion.eq(amount, len(centers)) for center_index in centers: assertion.gt(len(data), center_index) assertion.le(0, center_index) assertion.eq(1, centers.count(center_index)) return centers
def templateClusterAllocationOneDimensionData(ccore_flag): input_data = [[0.0] for _ in range(10)] + [[5.0] for _ in range(10)] + [[10.0] for _ in range(10)] + [[15.0] for _ in range(10)] xmeans_instance = xmeans(input_data, [[0.5], [5.5], [10.5], [15.5]], 20, 0.025, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore_flag) xmeans_instance.process() clusters = xmeans_instance.get_clusters() centers = xmeans_instance.get_centers() assertion.eq(len(clusters), 4) assertion.eq(len(centers), len(clusters)) assertion.le(len(clusters), 20) for cluster in clusters: assertion.eq(len(cluster), 10)
def correct_scores(sample_path, answer_path, ccore_flag, **kwargs): data_type = kwargs.get('data_type', 'points') sample = read_sample(sample_path) if data_type == 'distance_matrix': sample = calculate_distance_matrix(sample, distance_metric(type_metric.EUCLIDEAN_SQUARE)) clusters = answer_reader(answer_path).get_clusters() scores = silhouette(sample, clusters, ccore=ccore_flag, data_type=data_type).process().get_score() assertion.eq(len(sample), len(scores)) for score in scores: assertion.le(-1.0, score) assertion.ge(1.0, score) return scores
def template_correct_ksearch(self, sample_path, answer_path, kmin, kmax, algorithm): attempts = 5 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch( sample, kmin, kmax, algorithm=algorithm).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) if amount != len(clusters): continue testing_result = True break assertion.true(testing_result)
def templateCollectEvolution(filename, initial_centers, number_clusters, ccore_flag): sample = read_sample(filename) observer = kmeans_observer() kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer) kmeans_instance.process() assertion.le(1, len(observer)) for i in range(len(observer)): assertion.le(1, len(observer.get_centers(i))) for center in observer.get_centers(i): assertion.eq(len(sample[0]), len(center)) assertion.le(1, len(observer.get_clusters(i)))
def templateCollectEvolution(filename, initial_centers, number_clusters, ccore_flag): sample = read_sample(filename); observer = kmeans_observer(); kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer); kmeans_instance.process(); assertion.le(1, len(observer)); for i in range(len(observer)): assertion.le(1, len(observer.get_centers(i))); for center in observer.get_centers(i): assertion.eq(len(sample[0]), len(center)); assertion.le(1, len(observer.get_clusters(i)));