def template_clustering(sample_file_path, amount_clusters, initializer, show_animation = False): sample = read_sample(sample_file_path); observer = None; if (show_animation is True): observer = ema_observer(); initial_means, initial_covariance = ema_initializer(sample, amount_clusters).initialize(initializer); ema_instance = ema(sample, amount_clusters, initial_means, initial_covariance, observer=observer); ema_instance.process(); clusters = ema_instance.get_clusters(); covariances = ema_instance.get_covariances(); means = ema_instance.get_centers(); cluster_length = [ len(cluster) for cluster in clusters ]; print("Data '" + sample_file_path + "'"); print("Clusters: " + str(len(clusters)) + ", Length:" + str(cluster_length)); if (observer is True): ema_visualizer.show_clusters(observer.get_evolution_clusters()[0], sample, observer.get_evolution_covariances()[0], observer.get_evolution_means()[0]); ema_visualizer.show_clusters(clusters, sample, covariances, means); if (show_animation is True): ema_visualizer.animate_cluster_allocation(sample, observer);
def templateDataClustering(self, sample_path, amount_clusters, expected_clusters_sizes, init_type=ema_init_type.KMEANS_INITIALIZATION): testing_result = False if (init_type != ema_init_type.KMEANS_INITIALIZATION): attempts = 10 else: attempts = 5 for _ in range(attempts): sample = read_sample(sample_path) means, variances = None, None if (init_type is not ema_init_type.KMEANS_INITIALIZATION): means, variances = ema_initializer( sample, amount_clusters).initialize(init_type) ema_instance = ema(sample, amount_clusters, means, variances) ema_instance.process() clusters = ema_instance.get_clusters() centers = ema_instance.get_centers() covariances = ema_instance.get_covariances() probabilities = ema_instance.get_probabilities() assert len(centers) == len(clusters) assert len(covariances) == len(clusters) assert len(probabilities) == len(clusters) for cluster_probability in probabilities: assert len(cluster_probability) == len(sample) for index_point in range(len(sample)): total_probability = 0.0 for cluster_probability in probabilities: total_probability += cluster_probability[index_point] assert abs(total_probability - 1.0) <= 0.00001 obtained_cluster_sizes = [len(cluster) for cluster in clusters] if (len(sample) != sum(obtained_cluster_sizes)): continue if (expected_clusters_sizes != None): obtained_cluster_sizes.sort() expected_clusters_sizes.sort() if (obtained_cluster_sizes != expected_clusters_sizes): continue testing_result = True break assert testing_result == True
def templateDataClustering(self, sample_path, amount_clusters, expected_clusters_sizes, init_type = ema_init_type.KMEANS_INITIALIZATION): testing_result = False attempts = 10 for _ in range(attempts): sample = read_sample(sample_path) means, variances = None, None if init_type is not ema_init_type.KMEANS_INITIALIZATION: means, variances = ema_initializer(sample, amount_clusters).initialize(init_type) ema_instance = ema(sample, amount_clusters, means, variances) ema_instance.process() clusters = ema_instance.get_clusters() centers = ema_instance.get_centers() covariances = ema_instance.get_covariances() probabilities = ema_instance.get_probabilities() assert len(centers) == len(clusters) assert len(covariances) == len(clusters) assert len(probabilities) == len(clusters) for cluster_probability in probabilities: assert len(cluster_probability) == len(sample) for index_point in range(len(sample)): total_probability = 0.0 for cluster_probability in probabilities: total_probability += cluster_probability[index_point] assert abs(total_probability - 1.0) <= 0.00001 obtained_cluster_sizes = [len(cluster) for cluster in clusters] if len(sample) != sum(obtained_cluster_sizes): continue if expected_clusters_sizes is not None: obtained_cluster_sizes.sort() expected_clusters_sizes.sort() if obtained_cluster_sizes != expected_clusters_sizes: continue testing_result = True break assert testing_result is True
def testVisualizerNoFailures(self): sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3) means, variances = ema_initializer(sample, 4).initialize(ema_init_type.RANDOM_INITIALIZATION) observer_instance = ema_observer() ema_instance = ema(sample, 4, means, variances, observer_instance) ema_instance.process() clusters = ema_instance.get_clusters() means = ema_instance.get_centers() covariances = ema_instance.get_covariances() ema_visualizer.show_clusters(clusters, sample, covariances, means) ema_visualizer.animate_cluster_allocation(sample, observer_instance)
def testObserver(self): sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2) means, variances = ema_initializer(sample, 3).initialize(ema_init_type.RANDOM_INITIALIZATION) observer_instance = ema_observer() ema_instance = ema(sample, 3, means, variances, observer_instance) ema_instance.process() observer_length = len(observer_instance) assert observer_length > 0 assert observer_length == len(observer_instance.get_evolution_clusters()) assert observer_length == len(observer_instance.get_evolution_covariances()) assert observer_length == len(observer_instance.get_evolution_means()) assert observer_length == observer_instance.get_iterations()