def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs): repeat = 10 # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails. testing_result = False initializer = kwargs.get('initializer', kmeans_plusplus_initializer) sample = read_sample(path_to_data) answer = answer_reader(path_to_answer) additional_info = [] for _ in range(repeat): elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer) elbow_instance.process() actual_elbow = elbow_instance.get_amount() actual_wce = elbow_instance.get_wce() assertion.gt(actual_elbow, kmin) assertion.lt(actual_elbow, kmax) assertion.eq(len(actual_wce), kmax - kmin) assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001) if actual_elbow != len(answer.get_clusters()): additional_info.append(actual_elbow) #time.sleep(0.05) # sleep to gain new seed for random generator continue testing_result = True break message = str(len(answer.get_clusters())) + ": " + str(additional_info) assertion.true(testing_result, message=message)
def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag): attempts = 15 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) upper_limit = len(clusters) + 1 lower_limit = len(clusters) - 1 if lower_limit < 1: lower_limit = 1 if (amount > upper_limit) or (amount < lower_limit): continue testing_result = True break assertion.true(testing_result)
def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs): repeat = 5 # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails. testing_result = False initializer = kwargs.get('initializer', kmeans_plusplus_initializer) sample = read_sample(path_to_data) answer = answer_reader(path_to_answer) for _ in range(repeat): elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer) elbow_instance.process() actual_elbow = elbow_instance.get_amount() actual_wce = elbow_instance.get_wce() assertion.gt(actual_elbow, kmin) assertion.lt(actual_elbow, kmax) assertion.eq(len(actual_wce), kmax - kmin) assertion.lt(actual_wce[-1], actual_wce[0]) if actual_elbow != len(answer.get_clusters()): continue testing_result = True break assertion.true(testing_result)
def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag): attempts = 10 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) upper_limit = len(clusters) + 1 lower_limit = len(clusters) - 1 if lower_limit < 1: lower_limit = 1 if (amount > upper_limit) or (amount < lower_limit): continue testing_result = True break assertion.true(testing_result)
def templateSyncsegmSegmentation(image_source, radius_color, radius_object, noise_size, expected_color_segments, expected_object_segments, collect_dynamic, ccore_flag): result_testing = False color_segments, object_segments = [], [] for _ in range(0, 10, 1): algorithm = syncsegm(radius_color, radius_object, noise_size, ccore=ccore_flag) analyser = algorithm.process(image_source, collect_dynamic, 0.9995, 0.9995) color_segments = analyser.allocate_colors() object_segments = analyser.allocate_objects(0.2) if (len(color_segments) != expected_color_segments) or ( len(object_segments) != expected_object_segments): continue result_testing = True break assertion.eq(expected_color_segments, len(color_segments)) assertion.eq(expected_object_segments, len(object_segments)) assertion.true(result_testing)
def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs): repeat = 10 # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails. testing_result = False initializer = kwargs.get('initializer', kmeans_plusplus_initializer) sample = read_sample(path_to_data) answer = answer_reader(path_to_answer) additional_info = [] for _ in range(repeat): elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer) elbow_instance.process() actual_elbow = elbow_instance.get_amount() actual_wce = elbow_instance.get_wce() assertion.gt(actual_elbow, kmin) assertion.lt(actual_elbow, kmax) assertion.eq(len(actual_wce), kmax - kmin) assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001) if actual_elbow != len(answer.get_clusters()): additional_info.append(actual_elbow) #time.sleep(0.05) # sleep to gain new seed for random generator continue testing_result = True break message = str(len(answer.get_clusters())) + ": " + str(additional_info) assertion.true(testing_result, message=message)
def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_cluster_length, metric, ccore_flag, **kwargs): sample = read_sample(path_to_file) data_type = kwargs.get('data_type', 'points') input_type = kwargs.get('input_type', 'list') initialize_medoids = kwargs.get('initialize_medoids', None) if metric is None: metric = distance_metric(type_metric.EUCLIDEAN_SQUARE) input_data = sample if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample) if input_type == 'numpy': input_data = numpy.array(input_data) testing_result = False testing_attempts = 1 if initialize_medoids is not None: # in case center initializer randomization appears testing_attempts = 10 for _ in range(testing_attempts): if initialize_medoids is not None: initial_medoids = kmeans_plusplus_initializer( sample, initialize_medoids).initialize(return_index=True) kmedoids_instance = kmedoids(input_data, initial_medoids, 0.025, ccore_flag, metric=metric, data_type=data_type) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() medoids = kmedoids_instance.get_medoids() if len(clusters) != len(medoids): continue if len(set(medoids)) != len(medoids): continue obtained_cluster_sizes = [len(cluster) for cluster in clusters] if len(sample) != sum(obtained_cluster_sizes): continue if expected_cluster_length is not None: obtained_cluster_sizes.sort() expected_cluster_length.sort() if obtained_cluster_sizes != expected_cluster_length: continue testing_result = True assertion.true(testing_result)
def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_cluster_length, metric, ccore_flag, **kwargs): sample = read_sample(path_to_file) data_type = kwargs.get('data_type', 'points') input_type = kwargs.get('input_type', 'list') initialize_medoids = kwargs.get('initialize_medoids', None) itermax = kwargs.get('itermax', 200) if metric is None: metric = distance_metric(type_metric.EUCLIDEAN_SQUARE) input_data = sample if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample) if input_type == 'numpy': input_data = numpy.array(input_data) testing_result = False testing_attempts = 1 if initialize_medoids is not None: # in case center initializer randomization appears testing_attempts = 10 for _ in range(testing_attempts): if initialize_medoids is not None: initial_medoids = kmeans_plusplus_initializer(sample, initialize_medoids).initialize(return_index=True) kmedoids_instance = kmedoids(input_data, initial_medoids, 0.001, ccore_flag, metric=metric, data_type=data_type, itermax=itermax) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() medoids = kmedoids_instance.get_medoids() if itermax == 0: assertion.eq([], clusters) assertion.eq(medoids, initial_medoids) return if len(clusters) != len(medoids): continue if len(set(medoids)) != len(medoids): continue obtained_cluster_sizes = [len(cluster) for cluster in clusters] if len(sample) != sum(obtained_cluster_sizes): continue if expected_cluster_length is not None: obtained_cluster_sizes.sort() expected_cluster_length.sort() if obtained_cluster_sizes != expected_cluster_length: continue testing_result = True assertion.true(testing_result)
def clustering_with_answer(data_file, answer_file, ccore, **kwargs): data = read_sample(data_file) reader = answer_reader(answer_file) amount_medoids = len(reader.get_clusters()) initial_medoids = kmeans_plusplus_initializer( data, amount_medoids, **kwargs).initialize(return_index=True) kmedoids_instance = kmedoids(data, initial_medoids, 0.001, ccore, **kwargs) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() medoids = kmedoids_instance.get_medoids() expected_length_clusters = sorted(reader.get_cluster_lengths()) assertion.eq(len(expected_length_clusters), len(medoids)) assertion.eq(len(data), sum([len(cluster) for cluster in clusters])) assertion.eq(sum(expected_length_clusters), sum([len(cluster) for cluster in clusters])) unique_medoids = set() for medoid in medoids: assertion.false( medoid in unique_medoids, message="Medoids '%s' is not unique (actual medoids: '%s')" % (str(medoid), str(unique_medoids))) unique_medoids.add(medoid) unique_points = set() for cluster in clusters: for point in cluster: assertion.false( point in unique_points, message= "Point '%s' is already assigned to one of the clusters." % str(point)) unique_points.add(point) assertion.eq(expected_length_clusters, sorted([len(cluster) for cluster in clusters])) expected_clusters = reader.get_clusters() for actual_cluster in clusters: cluster_found = False for expected_cluster in expected_clusters: if actual_cluster == expected_cluster: cluster_found = True assertion.true( cluster_found, message="Actual cluster '%s' is not found among expected." % str(actual_cluster))
def templatePredict(path_to_file, initial_medoids, points, expected_closest_clusters, ccore, **kwargs): sample = read_sample(path_to_file) metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE)) itermax = kwargs.get('itermax', 200) kmedoids_instance = kmedoids(sample, initial_medoids, 0.001, ccore, metric=metric, itermax=itermax) kmedoids_instance.process() closest_clusters = kmedoids_instance.predict(points) assertion.eq(len(expected_closest_clusters), len(closest_clusters)) assertion.true(numpy.array_equal(numpy.array(expected_closest_clusters), closest_clusters))
def templatePredict(path_to_file, initial_centers, points, expected_amount, expected_closest_clusters, ccore, **kwargs): sample = read_sample(path_to_file) kmax = kwargs.get('kmax', 20) xmeans_instance = xmeans(sample, initial_centers, kmax, 0.025, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore) xmeans_instance.process() closest_clusters = xmeans_instance.predict(points) assertion.eq(expected_amount, len(xmeans_instance.get_clusters())) assertion.eq(len(expected_closest_clusters), len(closest_clusters)) assertion.true(numpy.array_equal(numpy.array(expected_closest_clusters), closest_clusters))
def test_logical_block_neighbors(self): block = clique_block() block.logical_location = [1, 1] neighbors = block.get_location_neighbors(3) assertion.eq(4, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([2, 1] in neighbors) assertion.true([1, 0] in neighbors) assertion.true([1, 2] in neighbors)
def test_logical_block_neighbors(self): block = clique_block() block.logical_location = [1, 1] neighbors = block.get_location_neighbors(3) assertion.eq(4, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([2, 1] in neighbors) assertion.true([1, 0] in neighbors) assertion.true([1, 2] in neighbors)
def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs): repeat = 15 # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails. testing_result = False kstep = kwargs.get('kstep', 1) sample = read_sample(path_to_data) expected_clusters_amount = None if path_to_answer is not None: if isinstance(path_to_answer, int): expected_clusters_amount = path_to_answer else: expected_clusters_amount = len( answer_reader(path_to_answer).get_clusters()) additional_info = [] for _ in range(repeat): elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, **kwargs) elbow_instance.process() actual_elbow = elbow_instance.get_amount() actual_wce = elbow_instance.get_wce() assertion.gt(actual_elbow, kmin) assertion.lt(actual_elbow, kmax) assertion.eq(len(actual_wce), math.floor((kmax - kmin) / kstep + 1)) assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001) if (expected_clusters_amount is not None) and ( actual_elbow != expected_clusters_amount): additional_info.append(actual_elbow) continue testing_result = True break message = None if expected_clusters_amount is not None: message = str(expected_clusters_amount) + ": " + str( additional_info) assertion.true(testing_result, message=message)
def templateSyncsegmSegmentation(image_source, radius_color, radius_object, noise_size, expected_color_segments, expected_object_segments, collect_dynamic, ccore_flag): result_testing = False color_segments, object_segments = [], [] for _ in range(0, 10, 1): algorithm = syncsegm(radius_color, radius_object, noise_size, ccore=ccore_flag) analyser = algorithm.process(image_source, collect_dynamic, 0.9995, 0.9995) color_segments = analyser.allocate_colors() object_segments = analyser.allocate_objects(0.2) if (len(color_segments) != expected_color_segments) or (len(object_segments) != expected_object_segments): continue result_testing = True break assertion.eq(expected_color_segments, len(color_segments)) assertion.eq(expected_object_segments, len(object_segments)) assertion.true(result_testing)
def test_logical_block_neighbors_on_edge(self): block = clique_block() block.logical_location = [1, 1] neighbors = block.get_location_neighbors(2) assertion.eq(2, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([1, 0] in neighbors) block.logical_location = [0, 0] neighbors = block.get_location_neighbors(2) assertion.eq(2, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([1, 0] in neighbors)
def test_logical_block_neighbors_on_edge(self): block = clique_block() block.logical_location = [1, 1] neighbors = block.get_location_neighbors(2) assertion.eq(2, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([1, 0] in neighbors) block.logical_location = [0, 0] neighbors = block.get_location_neighbors(2) assertion.eq(2, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([1, 0] in neighbors)
def template_correct_ksearch(self, sample_path, answer_path, kmin, kmax, algorithm): attempts = 5 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch( sample, kmin, kmax, algorithm=algorithm).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) if amount != len(clusters): continue testing_result = True break assertion.true(testing_result)
def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_cluster_length, metric, ccore_flag, **kwargs): sample = read_sample(path_to_file) data_type = kwargs.get('data_type', 'points') input_type = kwargs.get('input_type', 'list') initialize_medoids = kwargs.get('initialize_medoids', None) itermax = kwargs.get('itermax', 200) if metric is None: metric = distance_metric(type_metric.EUCLIDEAN_SQUARE) input_data = sample if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample, metric) if input_type == 'numpy': input_data = numpy.array(input_data) testing_result = False testing_attempts = 1 if initialize_medoids is not None: # in case center initializer randomization appears testing_attempts = 10 for _ in range(testing_attempts): if initialize_medoids is not None: initial_medoids = kmeans_plusplus_initializer(sample, initialize_medoids).initialize(return_index=True) kmedoids_instance = kmedoids(input_data, initial_medoids, 0.001, ccore=ccore_flag, metric=metric, data_type=data_type, itermax=itermax) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() medoids = kmedoids_instance.get_medoids() if itermax == 0: assertion.eq(0, kmedoids_instance.get_iterations()) assertion.eq(0.0, kmedoids_instance.get_total_deviation()) assertion.eq([], clusters) assertion.eq(medoids, initial_medoids) return if len(clusters) != len(medoids): continue if len(set(medoids)) != len(medoids): continue obtained_cluster_sizes = [len(cluster) for cluster in clusters] if len(sample) != sum(obtained_cluster_sizes): continue for cluster in clusters: if len(cluster) == 0: continue if expected_cluster_length is not None: obtained_cluster_sizes.sort() expected_cluster_length.sort() if obtained_cluster_sizes != expected_cluster_length: continue assertion.gt(kmedoids_instance.get_iterations(), 0) expected_total_deviation = 0.0 for index_cluster in range(len(clusters)): index_point_medoid = medoids[index_cluster] for index_point in clusters[index_cluster]: if index_point == index_point_medoid: continue expected_total_deviation += metric(sample[index_point_medoid], sample[index_point]) assertion.eq_float(expected_total_deviation, kmedoids_instance.get_total_deviation(), 0.000001) labels = kmedoids_instance.get_labels() assertion.eq(len(sample), len(labels)) for index_point in range(len(labels)): actual_index_cluster = labels[index_point] assertion.true(index_point in clusters[actual_index_cluster]) testing_result = True assertion.true(testing_result)