Exemple #1
0
 def testVisualize2DAnd3DClusters(self):
     sample_2d = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
     sample_3d = read_sample(FCPS_SAMPLES.SAMPLE_HEPTA);
       
     visualizer = cluster_visualizer(2, 2);
     visualizer.append_clusters([ sample_2d ], None, 0, markersize = 5);
     visualizer.append_clusters([ sample_3d ], None, 1, markersize = 30);
     visualizer.show();
Exemple #2
0
 def testVisualize1DClustersTwoCanvases(self):
     sample_simple7 = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE7);
     sample_simple8 = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE8);
  
     # Two canvas visualization
     visualizer = cluster_visualizer(2);
     visualizer.append_clusters([ sample_simple7 ], None, 0, markersize = 30);
     visualizer.append_clusters([ sample_simple8 ], None, 1, markersize = 30);
     visualizer.show();
Exemple #3
0
 def testVisualize3DClustersTwoCanvases(self):
     sample_tetra = read_sample(FCPS_SAMPLES.SAMPLE_TETRA);
     sample_hepta = read_sample(FCPS_SAMPLES.SAMPLE_HEPTA);
           
     # Two canvas visualization
     visualizer = cluster_visualizer(2);
     visualizer.append_clusters([ sample_tetra ], None, 0, markersize = 30);
     visualizer.append_clusters([ sample_hepta ], None, 1, markersize = 30);
     visualizer.show();
Exemple #4
0
 def testVisualizeRectangeRepresentation2x2(self):
     sample_simple1 = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
     sample_simple2 = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);
     sample_simple3 = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);
        
     visualizer = cluster_visualizer(3, 2);
     visualizer.append_clusters([ sample_simple1 ], None, 0, markersize = 5);
     visualizer.append_clusters([ sample_simple2 ], None, 1, markersize = 5);
     visualizer.append_clusters([ sample_simple3 ], None, 2, markersize = 5);
     visualizer.show();
Exemple #5
0
 def testAllocatedRequestedClustersSampleSimple03(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
     KmedoidsTestTemplates.templateAllocateRequestedClusterAmount(sample, 2, None, False)
     KmedoidsTestTemplates.templateAllocateRequestedClusterAmount(sample, 5, None, False)
     KmedoidsTestTemplates.templateAllocateRequestedClusterAmount(sample, 8, None, False)
     KmedoidsTestTemplates.templateAllocateRequestedClusterAmount(sample, 10, None, False)
     KmedoidsTestTemplates.templateAllocateRequestedClusterAmount(sample, 15, None, False)
 def templateClusterAllocation(self, path, cluster_sizes, number_clusters, iterations, maxneighbors):
     result_testing = False;
     
     # it's randomized algorithm therefore attempts are required
     for attempt in range(0, 5, 1):
         sample = read_sample(path);
         
         clarans_instance = clarans(sample, number_clusters, iterations, maxneighbors);
         clarans_instance.process();
         clusters = clarans_instance.get_clusters();
 
         obtained_cluster_sizes = [len(cluster) for cluster in clusters];
         
         total_length = sum(obtained_cluster_sizes);
         if (total_length != len(sample)):
             continue;
         
         cluster_sizes.sort();
         obtained_cluster_sizes.sort();
         if (cluster_sizes != obtained_cluster_sizes):
             continue;
         
         result_testing = True;
         break;
     
     assert result_testing == True;
    def templateLengthProcessData(path_to_file, start_centers, expected_cluster_length, ccore, **kwargs):
        sample = read_sample(path_to_file)

        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE))
        itermax = kwargs.get('itermax', 200)
        
        kmeans_instance = kmeans(sample, start_centers, 0.001, ccore, metric=metric, itermax=itermax)
        kmeans_instance.process()
        
        clusters = kmeans_instance.get_clusters()
        centers = kmeans_instance.get_centers()
        wce = kmeans_instance.get_total_wce()

        if itermax == 0:
            assertion.eq(start_centers, centers)
            assertion.eq([], clusters)
            assertion.eq(0.0, wce)
            return

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
        assertion.eq(len(sample), sum(obtained_cluster_sizes))
        
        assertion.eq(len(clusters), len(centers))
        for center in centers:
            assertion.eq(len(sample[0]), len(center))
        
        if expected_cluster_length is not None:
            obtained_cluster_sizes.sort()
            expected_cluster_length.sort()
            assertion.eq(obtained_cluster_sizes, expected_cluster_length)
    def templateClusteringWithAnswers(sample_path, answer_path, radius, neighbors, ccore, **kwargs):
        random_order = kwargs.get('random_order', False)
        repeat = kwargs.get('repeat', 1)

        for _ in range(repeat):
            sample = read_sample(sample_path)

            sample_index_map = [ i for i in range(len(sample)) ]
            if random_order:
                shuffle(sample_index_map)

            sample_shuffled = [ sample[i] for i in sample_index_map ]

            dbscan_instance = dbscan(sample_shuffled, radius, neighbors, ccore)
            dbscan_instance.process()

            clusters = dbscan_instance.get_clusters()
            noise = dbscan_instance.get_noise()

            for cluster in clusters:
                for i in range(len(cluster)):
                    cluster[i] = sample_index_map[cluster[i]]

            for i in range(len(noise)):
                noise[i] = sample_index_map[noise[i]]
            noise = sorted(noise)

            reader = answer_reader(answer_path)
            expected_noise = sorted(reader.get_noise())
            expected_length_clusters = reader.get_cluster_lengths()

            assertion.eq(len(sample), sum([len(cluster) for cluster in clusters]) + len(noise))
            assertion.eq(sum(expected_length_clusters), sum([len(cluster) for cluster in clusters]))
            assertion.eq(expected_length_clusters, sorted([len(cluster) for cluster in clusters]))
            assertion.eq(expected_noise, noise)
Exemple #9
0
def template_clustering(path,
                        count_clusters,
                        chromosome_count,
                        population_count,
                        count_mutation_gens,
                        coeff_mutation_count=0.25,
                        select_coeff=1.0,
                        fps=15,
                        animation=False):

    sample = read_sample(path)

    algo_instance = genetic_algorithm(data=sample,
                                      count_clusters=count_clusters,
                                      chromosome_count=chromosome_count,
                                      population_count=population_count,
                                      count_mutation_gens=count_mutation_gens,
                                      coeff_mutation_count=coeff_mutation_count,
                                      select_coeff=select_coeff,
                                      observer=ga_observer(True, True, True))

    start_time = time.time()

    algo_instance.process()

    print("Sample: ", path, "\t\tExecution time: ", time.time() - start_time, "\n")

    observer = algo_instance.get_observer()
    
    ga_visualizer.show_clusters(sample, observer)
    
    if (animation is True):
        ga_visualizer.animate_cluster_allocation(sample, observer, movie_fps=fps, save_movie="clustering_animation.mp4")
 def templateClustering(self, file, radius, order, solver, initial, storage_flag, conn_weigh_flag, tolerance, connection, expected_cluster_length, ccore_flag):
     result_testing = False;
     
     # If phases crosses each other because of random part of the network then we should try again.
     for attempt in range(0, 4, 1):
         sample = read_sample(file);
         network = syncnet(sample, radius, connection, initial, conn_weigh_flag, ccore_flag);
         analyser = network.process(order, solver, storage_flag);
         
         clusters = analyser.allocate_clusters(tolerance);
         
         obtained_cluster_sizes = [len(cluster) for cluster in clusters];
 
         if (len(obtained_cluster_sizes) != len(expected_cluster_length)):
             continue;
         
         obtained_cluster_sizes.sort();
         expected_cluster_length.sort();
         
         if (obtained_cluster_sizes != expected_cluster_length):
             continue;
         
         # Unit-test is passed
         result_testing = True;
         break;
     
     assert result_testing;
def template_clustering(file, map_size, trust_order, sync_order = 0.999, show_dyn = False, show_layer1 = False, show_layer2 = False, show_clusters = True):
    # Read sample
    sample = read_sample(file);

    # Create network
    network = syncsom(sample, map_size[0], map_size[1]);
    
    # Run processing
    (ticks, (dyn_time, dyn_phase)) = timedcall(network.process, trust_order, show_dyn, sync_order);
    print("Sample: ", file, "\t\tExecution time: ", ticks, "\n");
    
    # Show dynamic of the last layer.
    if (show_dyn == True):
        draw_dynamics(dyn_time, dyn_phase, x_title = "Time", y_title = "Phase", y_lim = [0, 2 * 3.14]);
    
    if (show_clusters == True):
        clusters = network.get_som_clusters();
        draw_clusters(network.som_layer.weights, clusters);
    
    # Show network stuff.
    if (show_layer1 == True):
        network.show_som_layer();
    
    if (show_layer2 == True):
        network.show_sync_layer();
    
    if (show_clusters == True):
        clusters = network.get_clusters();
        draw_clusters(sample, clusters);
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag):
        attempts = 10
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
def template_clustering(number_clusters, path, links):
    sample = read_sample(path);
    
    clusters_centroid_link = None;
    clusters_single_link = None;
    clusters_complete_link = None;
    clusters_average_link = None;
    
    visualizer = cluster_visualizer(len(links));
    index_canvas = 0;
    
    if (type_link.CENTROID_LINK in links):
        agglomerative_centroid_link = agglomerative(sample, number_clusters, type_link.CENTROID_LINK);
        
        (ticks, result) = timedcall(agglomerative_centroid_link.process);
        clusters_centroid_link = agglomerative_centroid_link.get_clusters();
        
        visualizer.append_clusters(clusters_centroid_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Centroid', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Centroid", "\tExecution time: ", ticks, "\n");
    
    if (type_link.SINGLE_LINK in links):
        agglomerative_simple_link = agglomerative(sample, number_clusters, type_link.SINGLE_LINK);
        
        (ticks, result) = timedcall(agglomerative_simple_link.process);
        clusters_single_link = agglomerative_simple_link.get_clusters();
        
        visualizer.append_clusters(clusters_single_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Single', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Single", "\tExecution time: ", ticks, "\n");
    
    if (type_link.COMPLETE_LINK in links):
        agglomerative_complete_link = agglomerative(sample, number_clusters, type_link.COMPLETE_LINK);
        
        (ticks, result) = timedcall(agglomerative_complete_link.process);
        clusters_complete_link = agglomerative_complete_link.get_clusters();
        
        visualizer.append_clusters(clusters_complete_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Complete', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Complete", "\tExecution time: ", ticks, "\n");        
    
    if (type_link.AVERAGE_LINK in links):
        agglomerative_average_link = agglomerative(sample, number_clusters, type_link.AVERAGE_LINK);
        
        (ticks, result) = timedcall(agglomerative_average_link.process);
        clusters_average_link = agglomerative_average_link.get_clusters();
        
        visualizer.append_clusters(clusters_average_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Average', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Average", "\tExecution time: ", ticks, "\n");  
    
    visualizer.show();
 def templateLengthProcessData(self, file, som_map_size, avg_num_conn, eps, expected_cluster_length):
     result_testing = False;
     
     # If phases crosses each other because of random part of the network then we should try again.
     for attempt in range(0, 3, 1):
         sample = read_sample(file);
         network = syncsom(sample, som_map_size[0], som_map_size[1]);
         network.process(avg_num_conn, collect_dynamic = False, order = eps);
         
         clusters = network.get_clusters();
         
         obtained_cluster_sizes = [len(cluster) for cluster in clusters];
         if (len(sample) != sum(obtained_cluster_sizes)):
             continue;
         
         obtained_cluster_sizes.sort();
         expected_cluster_length.sort();
         #print(obtained_cluster_sizes, expected_cluster_length);
         if (obtained_cluster_sizes != expected_cluster_length):
             continue;
         
         # Unit-test is passed
         result_testing = True;
         break;
         
     assert result_testing;
def elbow_analysis(sample_file_path, kmin, kmax, **kwargs):
    initializer = kwargs.get('initializer', kmeans_plusplus_initializer)
    sample = read_sample(sample_file_path)

    elbow_instance = elbow(sample, kmin, kmax, initializer=initializer)
    elbow_instance.process()

    amount_clusters = elbow_instance.get_amount()
    wce = elbow_instance.get_wce()

    centers = kmeans_plusplus_initializer(sample, amount_clusters).initialize()
    kmeans_instance = kmeans(sample, centers)
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()

    print("Sample '%s': Obtained amount of clusters: '%d'." % (sample_file_path, amount_clusters))

    figure = plt.figure(1)
    ax = figure.add_subplot(111)
    ax.plot(range(kmin, kmax), wce, color='b', marker='.')
    ax.plot(amount_clusters, wce[amount_clusters - kmin], color='r', marker='.', markersize=10)
    ax.annotate("Elbow", (amount_clusters + 0.1, wce[amount_clusters - kmin] + 5))
    ax.grid(True)
    plt.ylabel("WCE")
    plt.xlabel("K")
    plt.show()

    kmeans_visualizer.show_clusters(sample, clusters, centers)
    def templateLengthProcessData(data, start_medians, expected_cluster_length, ccore, **kwargs):
        tolerance = kwargs.get('tolerance', 0.01)
        metric = kwargs.get('metric', None)
        itermax = kwargs.get('itermax', 200)

        if isinstance(data, str):
            sample = read_sample(data)
        else:
            sample = data

        kmedians_instance = kmedians(sample, start_medians, tolerance, ccore, metric=metric, itermax=itermax)
        kmedians_instance.process()
        
        clusters = kmedians_instance.get_clusters()
        medians = kmedians_instance.get_medians()

        if itermax == 0:
            assert clusters == []
            assert start_medians == medians
            return

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
        assert len(sample) == sum(obtained_cluster_sizes)
        assert len(medians) == len(clusters)
        
        if expected_cluster_length is not None:
            obtained_cluster_sizes.sort()
            expected_cluster_length.sort()
            if obtained_cluster_sizes != expected_cluster_length:
                print(obtained_cluster_sizes)
            assert obtained_cluster_sizes == expected_cluster_length
Exemple #17
0
 def testVisualizeByDataOnly(self):
     visualizer = cluster_visualizer();
      
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
     visualizer.append_clusters([ sample ]);
      
     visualizer.show();
    def templateTestAwardNeurons(file, rows, cols, time, expected_result, autostop, ccore_flag, parameters = None, **kwargs):
        store_load = kwargs.get('store_load', False)

        types = [type_conn.func_neighbor, type_conn.grid_eight, type_conn.grid_four, type_conn.honeycomb]
        sample = read_sample(file)
         
        if (parameters is None):
            parameters = som_parameters()
         
        for stucture in types:
            network = som(rows, cols, stucture, parameters, ccore = ccore_flag)
            if store_load:
                dump_network = pickle.dumps(network)
                network = pickle.loads(dump_network)

            network.train(sample, time, autostop)
            
            winners = network.get_winner_number()
            assert winners == len(expected_result)
            
            if sorted(network.awards) != expected_result:
                network.show_network(awards = True)
                assert sorted(network.awards) == expected_result
             
            total_capture_points = 0
            for points in network.capture_objects:
                total_capture_points += len(points)
             
            assert total_capture_points == sum(expected_result)
         
            del network
    def template_cluster_allocation(input_data, cluster_sizes, number_cluster, number_represent_points = 5, compression = 0.5, ccore_flag = False, **kwargs):
        if isinstance(input_data, str):
            sample = read_sample(input_data)
        else:
            sample = input_data

        numpy_usage = kwargs.get('numpy_usage', False)
        if numpy_usage is True:
            sample = numpy.array(sample)
         
        cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore = ccore_flag)
        cure_instance.process()
         
        clusters = cure_instance.get_clusters()
        representors = cure_instance.get_representors()
        means = cure_instance.get_means()

        assertion.eq(len(clusters), number_cluster)
        assertion.eq(len(representors), number_cluster)
        assertion.eq(len(means), number_cluster)
         
        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
         
        total_length = sum(obtained_cluster_sizes)
        assertion.eq(total_length, len(sample))
         
        cluster_sizes.sort()
        obtained_cluster_sizes.sort()
        assertion.eq(cluster_sizes, obtained_cluster_sizes)
Exemple #20
0
 def templateDataClustering(self, sample_path,
                                  amount_clusters,
                                  chromosome_count,
                                  population_count,
                                  count_mutation_gens,
                                  coeff_mutation_count,
                                  expected_clusters_sizes):
     testing_result = False
     
     for _ in range(3):
         sample = read_sample(sample_path)
         
         ga_instance = genetic_algorithm(sample, amount_clusters, chromosome_count, population_count,
                                         count_mutations_gen=count_mutation_gens,
                                         coeff_mutation_count=coeff_mutation_count)
         
         ga_instance.process()
         clusters = ga_instance.get_clusters()
         
         obtained_cluster_sizes = [len(cluster) for cluster in clusters]
         if len(sample) != sum(obtained_cluster_sizes):
             continue
         
         if expected_clusters_sizes is not None:
             obtained_cluster_sizes.sort()
             expected_clusters_sizes.sort()
             if obtained_cluster_sizes != expected_clusters_sizes:
                 continue
         
         testing_result = True
         break
     
     assert testing_result is True
    def clustering(path, amount, threshold, expected, ccore, **kwargs):
        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN));

        sample = read_sample(path);

        bsas_instance = bsas(sample, amount, threshold, ccore=ccore, metric=metric);
        bsas_instance.process();

        clusters = bsas_instance.get_clusters();
        representatives = bsas_instance.get_representatives();

        obtained_length = 0;
        obtained_cluster_length = [];
        for cluster in clusters:
            obtained_length += len(cluster);
            obtained_cluster_length.append(len(cluster));

        assertion.eq(len(sample), obtained_length);
        assertion.eq(len(expected), len(clusters));
        assertion.eq(len(expected), len(representatives));
        assertion.ge(amount, len(clusters));

        dimension = len(sample[0]);
        for rep in representatives:
            assertion.eq(dimension, len(rep));

        expected.sort();
        obtained_cluster_length.sort();

        assertion.eq(expected, obtained_cluster_length);
    def template_visualize(self, path_sample, path_answer, filter=None, **kwargs):
        data = read_sample(path_sample)
        clusters = answer_reader(path_answer).get_clusters()

        visualizer = cluster_visualizer_multidim()
        visualizer.append_clusters(clusters, data)
        visualizer.show(filter, **kwargs)
Exemple #23
0
def template_clustering(sample_file_path, amount_clusters, initializer, show_animation = False):
    sample = read_sample(sample_file_path);
    
    observer = None;
    if (show_animation is True):
        observer = ema_observer();

    initial_means, initial_covariance = ema_initializer(sample, amount_clusters).initialize(initializer);
    ema_instance = ema(sample, amount_clusters, initial_means, initial_covariance, observer=observer);
    ema_instance.process();
    
    clusters = ema_instance.get_clusters();
    covariances = ema_instance.get_covariances();
    means = ema_instance.get_centers();

    cluster_length = [ len(cluster) for cluster in clusters ];

    print("Data '" + sample_file_path + "'");
    print("Clusters: " + str(len(clusters)) + ", Length:" + str(cluster_length));

    if (observer is True):
        ema_visualizer.show_clusters(observer.get_evolution_clusters()[0], sample, observer.get_evolution_covariances()[0], observer.get_evolution_means()[0]);
    
    ema_visualizer.show_clusters(clusters, sample, covariances, means);
    
    if (show_animation is True):
        ema_visualizer.animate_cluster_allocation(sample, observer);
    def templateLengthProcessData(input_sample, start_centers, expected_cluster_length, type_splitting, kmax, ccore):
        if isinstance(input_sample, str):
            sample = read_sample(input_sample)
        else:
            sample = input_sample
        
        #clusters = xmeans(sample, start_centers, 20, ccore);
        xmeans_instance = xmeans(sample, start_centers, kmax, 0.025, type_splitting, ccore)
        xmeans_instance.process()
         
        clusters = xmeans_instance.get_clusters()
        centers = xmeans_instance.get_centers()
    
        obtained_cluster_sizes = [len(cluster) for cluster in clusters]

        assert len(sample) == sum(obtained_cluster_sizes);
        assert len(clusters) == len(centers);
        assert len(centers) <= kmax;
        
        if expected_cluster_length is not None:
            assert len(centers) == len(expected_cluster_length);

            obtained_cluster_sizes.sort()
            expected_cluster_length.sort()
            
            assert obtained_cluster_sizes == expected_cluster_length;
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 10  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if actual_elbow != len(answer.get_clusters()):
                additional_info.append(actual_elbow)
                #time.sleep(0.05)    # sleep to gain new seed for random generator
                continue

            testing_result = True
            break

        message = str(len(answer.get_clusters())) + ": " + str(additional_info)
        assertion.true(testing_result, message=message)
Exemple #26
0
 def testShowLayersProcessing(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
     
     network = syncsom(sample, 4, 4, 1.0);
     network.process(collect_dynamic = False, order = 0.99);
     
     network.show_som_layer();
     network.show_sync_layer();
Exemple #27
0
 def testClusteringOrderVisualizer(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE4);
        
     optics_instance = optics(sample, 6.0, 3, 5);
     optics_instance.process();
        
     analyser = ordering_analyser(optics_instance.get_ordering());
     ordering_visualizer.show_ordering_diagram(analyser, 5);
    def templateAnimateClusteringResultNoFailure(filename, initial_centers, ccore_flag):
        sample = read_sample(filename)

        observer = kmeans_observer()
        kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer)
        kmeans_instance.process()

        kmeans_visualizer.animate_cluster_allocation(sample, observer)
Exemple #29
0
 def testVisualizeOnExistedFigure(self):
     figure = plt.figure();
      
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
      
     visualizer = cluster_visualizer();
     visualizer.append_clusters([ sample ]);
     visualizer.show(figure);
Exemple #30
0
 def testVisualizeRectangeRepresentation3x5(self):
     visualizer = cluster_visualizer(15, 5);
      
     for i in range(15):
         sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
         visualizer.append_clusters([ sample ], None, i, markersize = 5);
      
     visualizer.show();
Exemple #31
0
    def templateClusteringResults(path, radius, neighbors,
                                  expected_length_clusters, ccore):
        sample = read_sample(path)

        dbscan_instance = dbscan(sample, radius, neighbors, ccore)
        dbscan_instance.process()

        clusters = dbscan_instance.get_clusters()
        noise = dbscan_instance.get_noise()

        assert sum([len(cluster)
                    for cluster in clusters]) + len(noise) == len(sample)
        assert sum([len(cluster)
                    for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster)
                       for cluster in clusters]) == expected_length_clusters
Exemple #32
0
def template_clustering(start_centers, path, tolerance = 0.25, ccore = True):
    sample = read_sample(path);
    
    kmeans_instance = kmeans(sample, start_centers, tolerance, ccore);
    (ticks, result) = timedcall(kmeans_instance.process);
    
    clusters = kmeans_instance.get_clusters();
    centers = kmeans_instance.get_centers();
    
    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n");

    visualizer = cluster_visualizer();
    visualizer.append_clusters(clusters, sample);
    visualizer.append_cluster(start_centers, marker = '*', markersize = 20);
    visualizer.append_cluster(centers, marker = '*', markersize = 20);
    visualizer.show();
Exemple #33
0
    def random_state(rows, cols, connections, random_state, ccore_flag):
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1)

        params = som_parameters()
        params.random_state = random_state

        network_1 = som(rows, cols, connections, ccore=ccore_flag)
        steps_1 = network_1.train(sample, 100, True)

        network_2 = som(rows, cols, connections, ccore=ccore_flag)
        steps_2 = network_2.train(sample, 100, True)

        assert steps_1 == steps_2
        assert network_1.weights == network_2.weights
        assert network_1.capture_objects == network_2.capture_objects
        assert network_1.awards == network_2.awards
    def templateEncoderProcedures(sample, initial_centers, number_clusters,
                                  ccore_flag):
        sample = read_sample(sample)

        cure_instance = kmeans(sample, initial_centers, 0.025, ccore_flag)
        cure_instance.process()

        clusters = cure_instance.get_clusters()
        encoding = cure_instance.get_cluster_encoding()

        encoder = cluster_encoder(encoding, clusters, sample)
        encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
        encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION)
        encoder.set_encoding(type_encoding.CLUSTER_INDEX_LIST_SEPARATION)

        assert number_clusters == len(clusters)
    def templateLengthProcessData(path_to_file, radius, cluster_numbers,
                                  threshold, expected_cluster_length, ccore):
        sample = read_sample(path_to_file)

        rock_instance = rock(sample, radius, cluster_numbers, threshold, ccore)
        rock_instance.process()
        clusters = rock_instance.get_clusters()

        length = sum([len(cluster) for cluster in clusters])
        assert len(sample) == length

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
        obtained_cluster_sizes.sort()
        expected_cluster_length.sort()

        assert obtained_cluster_sizes == expected_cluster_length
Exemple #36
0
    def templateLengthProcessData(path_to_file, initial_medoids,
                                  expected_cluster_length, ccore_flag):
        sample = read_sample(path_to_file)

        kmedoids_instance = kmedoids(sample, initial_medoids, 0.025,
                                     ccore_flag)
        kmedoids_instance.process()

        clusters = kmedoids_instance.get_clusters()

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
        assert len(sample) == sum(obtained_cluster_sizes)

        obtained_cluster_sizes.sort()
        expected_cluster_length.sort()
        assert obtained_cluster_sizes == expected_cluster_length
Exemple #37
0
    def templateClusteringResultsSpecificData(data_type, path, radius,
                                              neighbors, amount_clusters,
                                              expected_length_clusters, ccore):
        sample = read_sample(path)

        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)
        else:
            input_data = sample

        optics_instance = optics(input_data,
                                 radius,
                                 neighbors,
                                 amount_clusters,
                                 ccore,
                                 data_type=data_type)
        optics_instance.process()

        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()
        optics_objects = optics_instance.get_optics_objects()

        object_indexes = set([obj.index_object for obj in optics_objects])
        assertion.eq(len(optics_objects), len(object_indexes))
        for obj in optics_objects:
            if obj.core_distance is not None:
                assertion.ge(obj.core_distance, 0)

            if obj.reachability_distance is not None:
                assertion.ge(obj.reachability_distance, 0)

        assert sum([len(cluster)
                    for cluster in clusters]) + len(noise) == len(sample)
        assert len(clusters) == len(expected_length_clusters)
        assert sum([len(cluster)
                    for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster) for cluster in clusters
                       ]) == sorted(expected_length_clusters)

        if amount_clusters is not None:
            analyser = ordering_analyser(optics_instance.get_ordering())
            assert len(analyser) > 0

            amount_clusters, borders = analyser.extract_cluster_amount(
                optics_instance.get_radius())
            assert amount_clusters == len(expected_length_clusters)
            assert len(borders) == amount_clusters - 1
Exemple #38
0
def template_clustering(file,
                        map_size,
                        radius,
                        sync_order=0.999,
                        show_dyn=False,
                        show_layer1=False,
                        show_layer2=False,
                        show_clusters=True):
    # Read sample
    sample = read_sample(file)

    # Create network
    network = syncsom(sample, map_size[0], map_size[1], radius)

    # Run processing
    (ticks, (dyn_time, dyn_phase)) = timedcall(network.process, show_dyn,
                                               sync_order)
    print("Sample: ", file, "\t\tExecution time: ", ticks, "\n")

    # Show dynamic of the last layer.
    if (show_dyn == True):
        draw_dynamics(dyn_time,
                      dyn_phase,
                      x_title="Time",
                      y_title="Phase",
                      y_lim=[0, 3.14])

    if (show_clusters == True):
        clusters = network.get_som_clusters()

        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, network.som_layer.weights)
        visualizer.show()

    # Show network stuff.
    if (show_layer1 == True):
        network.show_som_layer()

    if (show_layer2 == True):
        network.show_sync_layer()

    if (show_clusters == True):
        clusters = network.get_clusters()

        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, sample)
        visualizer.show()
Exemple #39
0
    def correct_scores(sample_path, answer_path, ccore_flag, **kwargs):
        data_type = kwargs.get('data_type', 'points')

        sample = read_sample(sample_path)
        if data_type == 'distance_matrix':
            sample = calculate_distance_matrix(sample, distance_metric(type_metric.EUCLIDEAN_SQUARE))

        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters, ccore=ccore_flag, data_type=data_type).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)

        return scores
    def exception(type, input_data, number_cluster, number_represent_points, compression, ccore_flag):
        try:
            if isinstance(input_data, str):
                sample = read_sample(input_data)
            else:
                sample = input_data

            cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore=ccore_flag)
            cure_instance.process()

        except type:
            return

        except Exception as ex:
            raise AssertionError("Expected: '%s', Actual: '%s'" % (type, type(ex).__name__))

        raise AssertionError("Expected: '%s', Actual: 'None'" % type)
Exemple #41
0
    def templateClusterAllocation(self, path, cluster_sizes, number_clusters, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.1, type_measurement = measurement_type.CENTROID_EUCLIDIAN_DISTANCE, entry_size_limit = 200, diameter_multiplier = 1.5):
        sample = read_sample(path);
        
        birch_instance = birch(sample, number_clusters, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, diameter_multiplier);
        birch_instance.process();
        
        clusters = birch_instance.get_clusters();

        obtained_cluster_sizes = [len(cluster) for cluster in clusters];
        
        total_length = sum(obtained_cluster_sizes);
        assert total_length == len(sample);
        
        if (cluster_sizes != None):
            cluster_sizes.sort();
            obtained_cluster_sizes.sort();
            assert cluster_sizes == obtained_cluster_sizes;
Exemple #42
0
def find_optimal_amout_clusters(sample_path, kmin, kmax, algorithm):
    sample = read_sample(sample_path)
    search_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm).process()

    amount = search_instance.get_amount()
    scores = search_instance.get_scores()

    print("Sample: '%s', Scores: '%s'" % (sample_path, str(scores)))

    initial_centers = kmeans_plusplus_initializer(sample, amount).initialize()
    kmeans_instance = kmeans(sample, initial_centers).process()

    clusters = kmeans_instance.get_clusters()

    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, sample)
    visualizer.show()
def template_clustering(path, amount, threshold, **kwargs):
    metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE));
    ccore = kwargs.get('ccore', False);
    draw = kwargs.get('draw', True);

    sample = read_sample(path);

    print("Sample: ", path);

    mbsas_instance = mbsas(sample, amount, threshold, ccore=ccore, metric=metric);
    mbsas_instance.process();

    clusters = mbsas_instance.get_clusters();
    representatives = mbsas_instance.get_representatives();

    if draw is True:
        bsas_visualizer.show_clusters(sample, clusters, representatives);
    def templateLengthProcessData(path_to_file, amount_clusters, expected_cluster_length, ccore):
        sample = read_sample(path_to_file);
        
        somsc_instance = somsc(sample, amount_clusters, 100, ccore);
        somsc_instance.process();
        
        clusters = somsc_instance.get_clusters();

        obtained_cluster_sizes = [len(cluster) for cluster in clusters];
        assert len(sample) == sum(obtained_cluster_sizes);
        
        if (expected_cluster_length != None):
            obtained_cluster_sizes.sort();
            expected_cluster_length.sort();
            if (obtained_cluster_sizes != expected_cluster_length):
                print 
            assert obtained_cluster_sizes == expected_cluster_length;
Exemple #45
0
    def templatePredict(path_to_file, initial_centers, points, expected_amount,
                        expected_closest_clusters, ccore, **kwargs):
        sample = read_sample(path_to_file)

        kmax = kwargs.get('kmax', 20)

        xmeans_instance = xmeans(sample, initial_centers, kmax, 0.025,
                                 splitting_type.BAYESIAN_INFORMATION_CRITERION,
                                 ccore)
        xmeans_instance.process()

        closest_clusters = xmeans_instance.predict(points)
        assertion.eq(expected_amount, len(xmeans_instance.get_clusters()))
        assertion.eq(len(expected_closest_clusters), len(closest_clusters))
        assertion.true(
            numpy.array_equal(numpy.array(expected_closest_clusters),
                              closest_clusters))
Exemple #46
0
def template_clustering(path_sample, eps, minpts):
    sample = read_sample(path_sample)

    optics_instance = optics(sample, eps, minpts)
    optics_instance.process()

    clusters = optics_instance.get_clusters()
    noise = optics_instance.get_noise()

    draw_clusters(sample, clusters, [], '.')

    ordering = optics_instance.get_cluster_ordering()
    indexes = [i for i in range(0, len(ordering))]

    # visualization of cluster ordering in line with reachability distance.
    plt.bar(indexes, ordering)
    plt.show()
 def testVisualizeClusterWithAttributes(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
     cure_instance = cure(sample, 2, 5, 0.5, False);
     cure_instance.process();
     
     clusters = cure_instance.get_clusters();
     representors = cure_instance.get_representors();
     means = cure_instance.get_means();
     
     visualizer = cluster_visualizer();
     visualizer.append_clusters(clusters, sample);
     
     for cluster_index in range(len(clusters)):
         visualizer.append_cluster_attribute(0, cluster_index, representors[cluster_index], '*', 10);
         visualizer.append_cluster_attribute(0, cluster_index, [ means[cluster_index] ], 'o');
     
     visualizer.show();
 def templateDynamicLength(path, number_clusters, expected_length, initial_neighbors, increase_persent, collect_dynamic_flag, ccore_flag):
     sample = read_sample(path);
     network = hsyncnet(sample, number_clusters, initial_type.EQUIPARTITION, initial_neighbors, increase_persent, ccore = ccore_flag);
     
     analyser = network.process(order = 0.995, solution = solve_type.FAST, collect_dynamic = collect_dynamic_flag);
     
     assert len(analyser) != 0;
     
     if (collect_dynamic_flag is True):
         assert len(analyser) >= 1;
         if (expected_length is None):
             assert len(analyser) > 1;
         else:
             assert len(analyser) == expected_length;
     
     else:
         assert len(analyser) == 1;
def template_clustering(path,
                        radius,
                        cluster_numbers,
                        threshold,
                        draw=True,
                        ccore=True):
    sample = read_sample(path)

    rock_instance = rock(sample, radius, cluster_numbers, threshold, ccore)
    (ticks, result) = timedcall(rock_instance.process)

    clusters = rock_instance.get_clusters()

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    if (draw == True):
        draw_clusters(sample, clusters)
    def templateKmeansPlusPlusForKmedoidsClustering(self, path_sample, amount,
                                                    expected_clusters_length):
        result_success = True
        for _ in range(3):
            try:
                sample = read_sample(path_sample)
                start_medoids = kmeans_plusplus_initializer(
                    sample, amount).initialize(return_index=True)
                KmedoidsTestTemplates.templateLengthProcessData(
                    path_sample, start_medoids, expected_clusters_length,
                    False)

            except AssertionError:
                continue
            break

        assert result_success == True
 def templateLengthProcessData(self, path_to_file, start_centers, expected_cluster_length, ccore = False):
     sample = read_sample(path_to_file);
     
     kmedians_instance = kmedians(sample, start_centers, 0.025, ccore);
     kmedians_instance.process();
     
     clusters = kmedians_instance.get_clusters();
     
     obtained_cluster_sizes = [len(cluster) for cluster in clusters];
     assert len(sample) == sum(obtained_cluster_sizes);
     
     if (expected_cluster_length is not None):
         obtained_cluster_sizes.sort();
         expected_cluster_length.sort();
         if (obtained_cluster_sizes != expected_cluster_length):
             print(obtained_cluster_sizes);
         assert obtained_cluster_sizes == expected_cluster_length;
Exemple #52
0
    def templateClusterAllocation(self, path, cluster_sizes, number_clusters, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.1, type_measurement = measurement_type.CENTROID_EUCLIDEAN_DISTANCE, entry_size_limit = 200, diameter_multiplier = 1.5):
        sample = read_sample(path)
        
        birch_instance = birch(sample, number_clusters, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, diameter_multiplier)
        birch_instance.process()
        
        clusters = birch_instance.get_clusters()

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
        
        total_length = sum(obtained_cluster_sizes)
        self.assertEqual(total_length, len(sample))
        
        if cluster_sizes is not None:
            cluster_sizes.sort()
            obtained_cluster_sizes.sort()
            self.assertEqual(cluster_sizes, obtained_cluster_sizes)
Exemple #53
0
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore,
                        **kwargs):
        repeat = 15  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False
        kstep = kwargs.get('kstep', 1)

        sample = read_sample(path_to_data)

        expected_clusters_amount = None
        if path_to_answer is not None:
            if isinstance(path_to_answer, int):
                expected_clusters_amount = path_to_answer
            else:
                expected_clusters_amount = len(
                    answer_reader(path_to_answer).get_clusters())

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, **kwargs)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce),
                         math.floor((kmax - kmin) / kstep + 1))
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if (expected_clusters_amount is not None) and (
                    actual_elbow != expected_clusters_amount):
                additional_info.append(actual_elbow)
                continue

            testing_result = True
            break

        message = None
        if expected_clusters_amount is not None:
            message = str(expected_clusters_amount) + ": " + str(
                additional_info)

        assertion.true(testing_result, message=message)
Exemple #54
0
    def clustering(path, levels, density_threshold, expected_clusters,
                   expected_noise, ccore, **kwargs):
        sample = read_sample(path)

        amount_threshold = kwargs.get('amount_threshold', 0)

        bang_instance = bang(sample,
                             levels,
                             ccore,
                             density_threshold=density_threshold,
                             amount_threshold=amount_threshold)

        bang_instance.process()

        clusters = bang_instance.get_clusters()
        noise = bang_instance.get_noise()
        directory = bang_instance.get_directory()
        dendrogram = bang_instance.get_dendrogram()

        assertion.eq(len(clusters), len(dendrogram))

        obtained_length = len(noise)
        obtained_cluster_length = []
        for cluster in clusters:
            obtained_length += len(cluster)
            obtained_cluster_length.append(len(cluster))

        obtained_cluster_length.sort()

        assertion.eq(len(sample), obtained_length)
        assertion.eq(expected_noise, len(noise))

        if expected_clusters is not None:
            assertion.eq(len(expected_clusters), len(clusters))
            assertion.eq(expected_clusters, obtained_cluster_length)

        leafs = directory.get_leafs()
        covered_points = set()
        for leaf in leafs:
            points = leaf.get_points()
            for index_point in points:
                covered_points.add(index_point)

        assertion.eq(len(sample), len(covered_points))
        return bang_instance
Exemple #55
0
 def templateClusteringResults(self, path, radius, neighbors, amount_clusters, expected_length_clusters, ccore):
     sample = read_sample(path);
     
     optics_instance = optics(sample, radius, neighbors, amount_clusters);
     optics_instance.process();
     
     clusters = optics_instance.get_clusters();
     noise = optics_instance.get_noise();
     
     assert sum([len(cluster) for cluster in clusters]) + len(noise) == len(sample);
     assert len(clusters) == len(expected_length_clusters);
     assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters);
     assert sorted([len(cluster) for cluster in clusters]) == sorted(expected_length_clusters);
     
     if (amount_clusters is not None):
         analyser = ordering_analyser(optics_instance.get_ordering());
         assert len(analyser) > 0;
         assert analyser.extract_cluster_amount(optics_instance.get_radius()) == len(expected_length_clusters);
Exemple #56
0
    def template_predict(self, path, amount, points, ccore):
        metric = distance_metric(type_metric.EUCLIDEAN)

        sample = read_sample(path)
        gmeans_instance = gmeans(sample, amount, ccore).process()
        centers = gmeans_instance.get_centers()

        closest_clusters = gmeans_instance.predict(points)

        self.assertEqual(len(points), len(closest_clusters))

        for i in range(len(points)):
            cluster_index = closest_clusters[i]
            distance = metric(centers[cluster_index], points[i])
            for center_index in range(len(centers)):
                if center_index != cluster_index:
                    other_distance = metric(centers[center_index], points[i])
                    self.assertLessEqual(distance, other_distance)
Exemple #57
0
    def templateSeachNearestNodeInTree(self, sample_path, **kwargs):
        numpy_usage = kwargs.get('numpy_usage', False)

        sample = read_sample(sample_path)
        if numpy_usage is True:
            sample = numpy.array(sample)

        tree = kdtree()

        for point in sample:
            node = tree.find_nearest_dist_node(point, 0.0)
            assert node == None

            tree.insert(point, None)

            node = tree.find_nearest_dist_node(point, 0.0)
            assert node != None
            assert node.data is point
Exemple #58
0
    def testObserver(self):
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2)

        means, variances = ema_initializer(sample, 3).initialize(
            ema_init_type.RANDOM_INITIALIZATION)

        observer_instance = ema_observer()
        ema_instance = ema(sample, 3, means, variances, observer_instance)
        ema_instance.process()

        observer_length = len(observer_instance)
        assert observer_length > 0
        assert observer_length == len(
            observer_instance.get_evolution_clusters())
        assert observer_length == len(
            observer_instance.get_evolution_covariances())
        assert observer_length == len(observer_instance.get_evolution_means())
        assert observer_length == observer_instance.get_iterations()
Exemple #59
0
    def random_state_fixed(path_to_data, kmin, kmax, ccore, **kwargs):
        repeat = kwargs.get('repeat', 1)

        for _ in range(repeat):
            sample = read_sample(path_to_data)

            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore,
                                   **kwargs).process()
            elbow_1 = elbow_instance.get_amount()
            wce_1 = elbow_instance.get_wce()

            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore,
                                   **kwargs).process()
            elbow_2 = elbow_instance.get_amount()
            wce_2 = elbow_instance.get_wce()

            assertion.eq(elbow_1, elbow_2)
            assertion.eq(wce_1, wce_2)
    def templateClusteringResults(path,
                                  number_clusters,
                                  link,
                                  expected_length_clusters,
                                  ccore_flag=False):
        sample = read_sample(path)

        agglomerative_instance = agglomerative(sample, number_clusters, link,
                                               ccore_flag)
        agglomerative_instance.process()

        clusters = agglomerative_instance.get_clusters()

        assert sum([len(cluster) for cluster in clusters]) == len(sample)
        assert sum([len(cluster)
                    for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster)
                       for cluster in clusters]) == expected_length_clusters