예제 #1
0
def template_clustering(sample_file_path, amount_clusters, initializer, show_animation = False):
    sample = read_sample(sample_file_path);
    
    observer = None;
    if (show_animation is True):
        observer = ema_observer();

    initial_means, initial_covariance = ema_initializer(sample, amount_clusters).initialize(initializer);
    ema_instance = ema(sample, amount_clusters, initial_means, initial_covariance, observer=observer);
    ema_instance.process();
    
    clusters = ema_instance.get_clusters();
    covariances = ema_instance.get_covariances();
    means = ema_instance.get_centers();

    cluster_length = [ len(cluster) for cluster in clusters ];

    print("Data '" + sample_file_path + "'");
    print("Clusters: " + str(len(clusters)) + ", Length:" + str(cluster_length));

    if (observer is True):
        ema_visualizer.show_clusters(observer.get_evolution_clusters()[0], sample, observer.get_evolution_covariances()[0], observer.get_evolution_means()[0]);
    
    ema_visualizer.show_clusters(clusters, sample, covariances, means);
    
    if (show_animation is True):
        ema_visualizer.animate_cluster_allocation(sample, observer);
예제 #2
0
def template_clustering(sample_file_path, amount_clusters, initializer, show_animation = False):
    sample = read_sample(sample_file_path);
    
    observer = None;
    if (show_animation is True):
        observer = ema_observer();

    initial_means, initial_covariance = ema_initializer(sample, amount_clusters).initialize(initializer);
    ema_instance = ema(sample, amount_clusters, initial_means, initial_covariance, observer=observer);
    ema_instance.process();
    
    clusters = ema_instance.get_clusters();
    covariances = ema_instance.get_covariances();
    means = ema_instance.get_centers();

    cluster_length = [ len(cluster) for cluster in clusters ];

    print("Data '" + sample_file_path + "'");
    print("Clusters: " + str(len(clusters)) + ", Length:" + str(cluster_length));

    if (observer is True):
        ema_visualizer.show_clusters(observer.get_evolution_clusters()[0], sample, observer.get_evolution_covariances()[0], observer.get_evolution_means()[0]);
    
    ema_visualizer.show_clusters(clusters, sample, covariances, means);
    
    if (show_animation is True):
        ema_visualizer.animate_cluster_allocation(sample, observer);
예제 #3
0
    def templateDataClustering(self,
                               sample_path,
                               amount_clusters,
                               expected_clusters_sizes,
                               init_type=ema_init_type.KMEANS_INITIALIZATION):
        testing_result = False
        if (init_type != ema_init_type.KMEANS_INITIALIZATION):
            attempts = 10
        else:
            attempts = 5

        for _ in range(attempts):
            sample = read_sample(sample_path)

            means, variances = None, None
            if (init_type is not ema_init_type.KMEANS_INITIALIZATION):
                means, variances = ema_initializer(
                    sample, amount_clusters).initialize(init_type)

            ema_instance = ema(sample, amount_clusters, means, variances)
            ema_instance.process()

            clusters = ema_instance.get_clusters()
            centers = ema_instance.get_centers()
            covariances = ema_instance.get_covariances()
            probabilities = ema_instance.get_probabilities()

            assert len(centers) == len(clusters)
            assert len(covariances) == len(clusters)
            assert len(probabilities) == len(clusters)

            for cluster_probability in probabilities:
                assert len(cluster_probability) == len(sample)

            for index_point in range(len(sample)):
                total_probability = 0.0
                for cluster_probability in probabilities:
                    total_probability += cluster_probability[index_point]

                assert abs(total_probability - 1.0) <= 0.00001

            obtained_cluster_sizes = [len(cluster) for cluster in clusters]
            if (len(sample) != sum(obtained_cluster_sizes)):
                continue

            if (expected_clusters_sizes != None):
                obtained_cluster_sizes.sort()
                expected_clusters_sizes.sort()
                if (obtained_cluster_sizes != expected_clusters_sizes):
                    continue

            testing_result = True
            break

        assert testing_result == True
예제 #4
0
 def templateDataClustering(self, sample_path, 
                            amount_clusters, 
                            expected_clusters_sizes, 
                            init_type = ema_init_type.KMEANS_INITIALIZATION):
     testing_result = False
     attempts = 10
     
     for _ in range(attempts):
         sample = read_sample(sample_path)
         
         means, variances = None, None
         if init_type is not ema_init_type.KMEANS_INITIALIZATION:
             means, variances = ema_initializer(sample, amount_clusters).initialize(init_type)
         
         ema_instance = ema(sample, amount_clusters, means, variances)
         ema_instance.process()
         
         clusters = ema_instance.get_clusters()
         centers = ema_instance.get_centers()
         covariances = ema_instance.get_covariances()
         probabilities = ema_instance.get_probabilities()
         
         assert len(centers) == len(clusters)
         assert len(covariances) == len(clusters)
         assert len(probabilities) == len(clusters)
         
         for cluster_probability in probabilities:
             assert len(cluster_probability) == len(sample)
         
         for index_point in range(len(sample)):
             total_probability = 0.0
             for cluster_probability in probabilities:
                 total_probability += cluster_probability[index_point]
             
             assert abs(total_probability - 1.0) <= 0.00001
         
         obtained_cluster_sizes = [len(cluster) for cluster in clusters]
         if len(sample) != sum(obtained_cluster_sizes):
             continue
         
         if expected_clusters_sizes is not None:
             obtained_cluster_sizes.sort()
             expected_clusters_sizes.sort()
             if obtained_cluster_sizes != expected_clusters_sizes:
                 continue
         
         testing_result = True
         break
     
     assert testing_result is True
예제 #5
0
 def testVisualizerNoFailures(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
     
     means, variances = ema_initializer(sample, 4).initialize(ema_init_type.RANDOM_INITIALIZATION)
     
     observer_instance = ema_observer()
     ema_instance = ema(sample, 4, means, variances, observer_instance)
     ema_instance.process()
     
     clusters = ema_instance.get_clusters()
     means = ema_instance.get_centers()
     covariances = ema_instance.get_covariances()
     
     ema_visualizer.show_clusters(clusters, sample, covariances, means)
     ema_visualizer.animate_cluster_allocation(sample, observer_instance)
예제 #6
0
 def testObserver(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2)
     
     means, variances = ema_initializer(sample, 3).initialize(ema_init_type.RANDOM_INITIALIZATION)
     
     observer_instance = ema_observer()
     ema_instance = ema(sample, 3, means, variances, observer_instance)
     ema_instance.process()
     
     observer_length = len(observer_instance)
     assert observer_length > 0
     assert observer_length == len(observer_instance.get_evolution_clusters())
     assert observer_length == len(observer_instance.get_evolution_covariances())
     assert observer_length == len(observer_instance.get_evolution_means())
     assert observer_length == observer_instance.get_iterations()
예제 #7
0
 def testVisualizerNoFailures(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
     
     means, variances = ema_initializer(sample, 4).initialize(ema_init_type.RANDOM_INITIALIZATION)
     
     observer_instance = ema_observer()
     ema_instance = ema(sample, 4, means, variances, observer_instance)
     ema_instance.process()
     
     clusters = ema_instance.get_clusters()
     means = ema_instance.get_centers()
     covariances = ema_instance.get_covariances()
     
     ema_visualizer.show_clusters(clusters, sample, covariances, means)
     ema_visualizer.animate_cluster_allocation(sample, observer_instance)
예제 #8
0
 def testObserver(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2)
     
     means, variances = ema_initializer(sample, 3).initialize(ema_init_type.RANDOM_INITIALIZATION)
     
     observer_instance = ema_observer()
     ema_instance = ema(sample, 3, means, variances, observer_instance)
     ema_instance.process()
     
     observer_length = len(observer_instance)
     assert observer_length > 0
     assert observer_length == len(observer_instance.get_evolution_clusters())
     assert observer_length == len(observer_instance.get_evolution_covariances())
     assert observer_length == len(observer_instance.get_evolution_means())
     assert observer_length == observer_instance.get_iterations()