Python Clustering.Clustering Examples, pyproct.clustering.clustering.Clustering.Clustering Python Examples

Example #1

0

Show file

File: TestDaviesBouldin.py Project: ztypaker/pyProCT

 def setUpClass(cls):
     cls.matrix = CondensedMatrix(squared_CH_table1)
      
     cls.clusterings = [Clustering([Cluster(None, [0,1,2,3]), Cluster(None, [4,5])]),
                         Clustering([Cluster(None, [0,1]), Cluster(None, [2,3]), Cluster(None, [4,5])])]
     update_medoids(cls.clusterings[0], cls.matrix)
     update_medoids(cls.clusterings[0], cls.matrix)

Example #2

0

Show file

File: TestMetrics.py Project: ztypaker/pyProCT

    def test_cluster_cohe_sep_wo_prot_eval(self):
        distances = CondensedMatrix([1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
        clusters_1 = [
            Cluster(None, elements=[0, 1]),
            Cluster(None, elements=[2]),
            Cluster(None, elements=[3, 4])
        ]

        clusters_2 = [
            Cluster(None, elements=[0, 2, 4]),
            Cluster(None, elements=[1, 3])
        ]

        clusterization_1 = Clustering(clusters_1)
        clusterization_2 = Clustering(clusters_2)
        sep_calctor = SeparationCalculator()

        self.assertEqual(
            sep_calctor.cluster_separation(clusters_1[0], clusterization_1, 1.,
                                           distances), 27.0)
        self.assertEqual(
            sep_calctor.cluster_separation(clusters_1[1], clusterization_1, 1.,
                                           distances), 24.0)
        self.assertEqual(
            sep_calctor.cluster_separation(clusters_1[2], clusterization_1, 1.,
                                           distances), 37.0)
        self.assertEqual(
            sep_calctor.cluster_separation(clusters_2[0], clusterization_2, 1.,
                                           distances), 34.0)
        self.assertEqual(
            sep_calctor.cluster_separation(clusters_2[1], clusterization_2, 1.,
                                           distances), 34.0)

Example #3

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

 def test_remove_noise(self):
     clusters = (Cluster(16, [16]), Cluster(4, [4, 5, 6, 7, 8]),
                 Cluster(0, [0, 1, 2, 3]),
                 Cluster(9, [9, 10, 11, 12, 13, 14, 15]))
     clustering = Clustering(clusters)
     clustering.eliminate_noise(5)
     self.assertEqual(len(clustering.clusters), 2)

Example #4

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

 def test_get_all_clustered_elements(self):
     clusters = (Cluster(16, [16]), Cluster(4, [4, 5, 6, 7, 8]),
                 Cluster(0, [0, 1, 2, 3]),
                 Cluster(9, [9, 10, 11, 12, 13, 14, 15]))
     clustering = Clustering(clusters)
     self.assertItemsEqual(sorted(clustering.get_all_clustered_elements()),
                           range(17))

Example #5

0

Show file

File: dbscanAlgorithm.py Project: ztypaker/pyProCT

    def perform_clustering(self, kwargs):
        """
        Main loop to perform the DBSCAN algorithm.
        """
        elements_class = [PointClassType.UNCLASSIFIED
                          ] * self.number_of_elements
        eps = kwargs["eps"]
        minpts = kwargs["minpts"]
        current_cluster_id = PointClassType.NOISE + 1

        for i in range(self.number_of_elements):
            current_element = i
            if elements_class[current_element] == PointClassType.UNCLASSIFIED:
                last_forms_a_cluster = self.__expand_cluster(
                    current_element, current_cluster_id, eps, minpts,
                    elements_class)
                if last_forms_a_cluster:
                    current_cluster_id = current_cluster_id + 1

        # Return the clusters once the clustering is done
        # NOISE elements form a single cluster with ID = PointClassType.NOISE
        # and will be removed from the clustering
        clusters = gen_clusters_from_class_list(
            elements_class, skip_list=[PointClassType.NOISE])
        return Clustering(clusters,
                          details="DBSCAN (eps = " + str(eps) + " minpts = " +
                          str(minpts) + ") " + str(self.number_of_elements) +
                          " elems")

Example #6

0

Show file

    def perform_clustering(self, kwargs):
        """
        Does the actual clustering.
        """
        cutoff = kwargs["cutoff"]

        try:
            max_clusters = kwargs["max_clusters"]
        except KeyError:
            max_clusters = sys.maxint

        nodes = range(self.condensed_matrix.row_length)
        clusters = []
        elements_already_clustered = 0
        iteration = 0
        # Do it while there are nodes left
        while not len(nodes) == 0 and not len(clusters) >= max_clusters:
            cluster = self.__do_one_iteration(nodes, cutoff)
            clusters.append(cluster)
            elements_already_clustered = elements_already_clustered + cluster.get_size(
            )
            if elements_already_clustered + len(
                    nodes) > self.condensed_matrix.row_length:
                print "[ERROR :: GROMOS perform_clustering] ", elements_already_clustered + len(
                    nodes), iteration
                exit(1)
            iteration = iteration + 1

        return Clustering(clusters,
                          details="GROMOS (cutoff = " + str(cutoff) + ")")

Example #7

0

Show file

def purge_mixed_clusters_and_do_graph(mixed, pure_clusters_traj1,condensed_distance_matrix,std_devs_from_A,path):
    """
    """
    common.print_and_flush( "Purging clusters...")
    # Purge all mixed clusters of elements from traj2
    purged = []
    num_elems_of_traj_2 = []
    for i in range(len(mixed)):
        cluster, elems_in_traj1, elems_in_traj2 = mixed[i] #@UnusedVariable
        num_elems_of_traj_2.append(len(elems_in_traj2))
        # We rebuild the cluster with only elements of traj 1
        purged.append(Cluster(prototype=None,elements = elems_in_traj1))
#        print "l ",len(elems_in_traj1)," ",len(elems_in_traj2)
    
    # we also need to have traj 1 pure clusters
    purged.extend(pure_clusters_traj1)
    
    # Those don't have any element of traj 2, so we put 0s in the number of 
    # elements list
    num_elems_of_traj_2.extend([0]*len(pure_clusters_traj1))
    
    #Calculate statistics for the remaining clusters
    for i in range(len(pure_clusters_traj1)):
        medoid = pure_clusters_traj1[i].calculate_medoid(condensed_distance_matrix)
        std_devs_from_A.append(get_distance_std_dev_for_elems(pure_clusters_traj1[i].all_elements,medoid,condensed_distance_matrix))
    common.print_and_flush( "Done.\n")
    
    common.print_and_flush("Trying to draw state graph...")
    do_graph(Clustering(purged,sort =  False),num_elems_of_traj_2,std_devs_from_A,path)
    common.print_and_flush("Done.\n")

Example #8

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

 def test_to_dic(self):
     clustering = Clustering([
         Cluster(16, [16]),
         Cluster(9, [9, 10, 11, 12, 13, 14, 15]),
         Cluster(0, [0, 1, 2, 3]),
         Cluster(4, [4, 5, 6, 7, 8])
     ])
     self.assertDictEqual(
         clustering.to_dic(), {
             'clusters': [{
                 'prototype': 9,
                 'elements': '9:15',
                 'id': 'cluster_1'
             }, {
                 'prototype': 4,
                 'elements': '4:8',
                 'id': 'cluster_3'
             }, {
                 'prototype': 0,
                 'elements': '0:3',
                 'id': 'cluster_2'
             }, {
                 'prototype': 16,
                 'elements': '16',
                 'id': 'cluster_0'
             }],
             'total_number_of_elements':
             17,
             'number_of_clusters':
             4
         })

Example #9

0

Show file

 def test_update_medois(self):
     clusters = [Cluster(None, [1,2]),Cluster(None, [3,4]), Cluster(None, [5])]
     clustering = Clustering(clusters)
     matrix = CondensedMatrix(squared_CH_table1)
     update_medoids(clustering, matrix)
     for c in clusters:
         self.assertNotEqual(c.prototype, None)
     
     self.assertItemsEqual([c.prototype for c in clusters], [1,3,5])

Example #10

0

Show file

File: hierarchicalAlgorithm.py Project: ztypaker/pyProCT

    def perform_clustering(self, kwargs):
        """
        Performs the hierarchical clustering step and the clustering step. If the hierarchical
        matrix is given, then it just calculates the clusters for a given cutoff. If we call the algorithm
        a second time it will use the last matrix.
        """
        """
        Gets a condensed matrix and calculates the clustering. One can use
        diverse methodologies to do this clustering...
        With preserve_input=False the matrix is destroyed while clustering, ut it saves
        memory.
        The metric is not needed in this case,as we are giving the function the calculated
        matrix.
        The method is the method used to determine distances when fusing clusters. methods are described in:
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html
        """
        try:
            cutoff = kwargs["cutoff"]
        except KeyError:
            cutoff = None

        try:
            hie_mat = kwargs["hie_mat"]
        except KeyError:
            hie_mat = None

        try:
            method = kwargs["method"]
        except KeyError:
            method = 'complete'

        if hie_mat != None:
            self.hie_mat = hie_mat
#            print "[HIERARCHICAL] Matrix provided."
        else:
            if self.hie_mat == None:
                #self.hie_mat = fast_hcluster.linkage(condensed_matrix, method='centroid', metric='euclidean', preserve_input=False)
                #                print "[HIERARCHICAL] Calculating Matrix"
                #self.hie_mat = fastclust.linkage(self.condensed_matrix.get_data(), method = method)
                self.hie_mat = hcluster_fast.linkage(
                    self.condensed_matrix.get_data(), method=method)
#            else:
#                print "[HIERARCHICAL] Matrix was already stored"

        algorithm_details = "Hierarchical with " + method + " method (cutoff = " + str(
            cutoff) + ")"

        if cutoff != None:
            # Then apply the cutoff, this doesn't work much as expected
            #            print "[HIERARCHICAL] getting clustering."+algorithm_details
            group_list = hcluster.fcluster(self.hie_mat, cutoff)
            #            print "[HIERARCHICAL] Clustering done."+algorithm_details
            # Then let's generate the clusters
            clusters = gen_clusters_from_class_list(group_list)
            return Clustering(clusters, details=algorithm_details)
        else:
            return None

Example #11

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

 def test_creation(self):
     # The inner list is a copy but shares clusters
     clusters = (Cluster(16, [16]), Cluster(4, [4, 5, 6, 7, 8]),
                 Cluster(0, [0, 1, 2, 3]),
                 Cluster(9, [9, 10, 11, 12, 13, 14, 15]))
     clustering = Clustering(clusters)
     clusters[1].prototype = -20
     self.assertEqual(clusters[1].prototype,
                      clustering.clusters[1].prototype)

Example #12

0

Show file

 def test_mean_cluster_size(self):
     clusters = [    Cluster(0,[0,4,5,7,13]),
                     Cluster(1,[1,16,17,18]),
                     Cluster(2,[2,3,8,19]),
                     Cluster(6,[6,11,12,15]),
                     Cluster(9,[9,10,14])]
     clustering = Clustering(clusters, "Test Clustering")
     analysisPopulator = AnalysisPopulatorMock("")
     self.assertEqual(4, analysisPopulator.analysis_function_mean_cluster_size(clustering))

Example #13

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

    def test_get_percent_population_of_cluster(self):
        clusters = (Cluster(16, [16]), Cluster(4, [4, 5, 6, 7, 8]),
                    Cluster(0, [0, 1, 2, 3]),
                    Cluster(9, [9, 10, 11, 12, 13, 14, 15]))
        clustering = Clustering(clusters)

        total = 0
        for i in range(4):
            total = total + clustering.get_population_percent_of_cluster(i)
        self.assertAlmostEqual(total, 100., 2)

Example #14

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

    def test_gen_class_list(self):
        clusters = (Cluster(16, [16]), Cluster(4, [4, 5, 6, 7, 8]),
                    Cluster(0, [0, 1, 2, 3]),
                    Cluster(9, [9, 10, 11, 12, 13, 14, 15]))
        clustering = Clustering(clusters)
        class_list = clustering.gen_class_list()
        expected_class_list = [
            2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 3
        ]
        self.assertItemsEqual(class_list, expected_class_list)

        clusters = (Cluster(0, [0, 1, 2, 3]),
                    Cluster(9, [9, 10, 11, 12, 13, 14, 15]))
        clustering = Clustering(clusters)
        class_list = clustering.gen_class_list()
        expected_class_list = [
            1, 1, 1, 1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0
        ]
        self.assertItemsEqual(class_list, expected_class_list)

Example #15

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

    def test_get_percent_of_n_clusters(self):
        clusters = (Cluster(16, [16]), Cluster(4, [4, 5, 6, 7, 8]),
                    Cluster(0, [0, 1, 2, 3]),
                    Cluster(9, [9, 10, 11, 12, 13, 14, 15]))
        clustering = Clustering(clusters)

        percents = clustering.get_population_percent_of_n_bigger_clusters(3)
        expected_percents = [41.1764705882, 29.4117647059, 23.5294117647]
        for i in range(3):
            self.assertAlmostEqual(percents[i], expected_percents[i], 1)

Example #16

0

Show file

File: TestMetrics.py Project: ztypaker/pyProCT

 def test_mini_evaluation(self):
     calculator = MeanMinimumDistanceCalculator(10)
     clusters = [
         Cluster(None, elements=[0, 1, 2]),
         Cluster(None, elements=[3, 4])
     ]
     triangle = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]
     distances = CondensedMatrix(triangle)
     clustering = Clustering(clusters)
     self.assertEqual(7.0, calculator.evaluate(clustering, distances, 20))

Example #17

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

 def test_get_medoids(self):
     clusters = [
         ClusterMock(range(0, 10)),
         ClusterMock(range(10, 50)),
         ClusterMock(range(50, 80)),
         ClusterMock(range(80, 200))
     ]
     clustering = Clustering(clusters)
     self.assertItemsEqual(clustering.get_medoids("distance_matrix"),
                           [0, 10, 50, 80])

Example #18

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

 def test_cluster_is_inside(self):
     clusters = (Cluster(16, [16]), Cluster(4, [4, 5, 6, 7, 8]),
                 Cluster(0, [0, 1, 2, 3]),
                 Cluster(9, [9, 10, 11, 12, 13, 14, 15]))
     not_in_cluster = Cluster(17, [17, 16])
     in_cluster = Cluster(0, [0, 1, 2, 3])
     clustering = Clustering(clusters)
     self.assertEqual(clustering.cluster_index(not_in_cluster), -1)
     self.assertEqual(clustering.cluster_index(in_cluster), 2)
     self.assertEqual(clustering.cluster_is_inside(not_in_cluster), False)
     self.assertEqual(clustering.cluster_is_inside(in_cluster), True)

Example #19

0

Show file

File: TestMetrics.py Project: ztypaker/pyProCT

    def test_regression_cohesion_eval(self):
        distances = CondensedMatrix([1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
        clusters = [
            Cluster(None, elements=[0, 1]),
            Cluster(None, elements=[2]),
            Cluster(None, elements=[3, 4])
        ]
        clustering = Clustering(clusters)

        cohesion_calctor = CohesionCalculator()
        self.assertEqual(cohesion_calctor.evaluate(clustering, distances), 5.5)

Example #20

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

 def test_classify(self):
     tags = ["A", "B", "C"]
     clusterings = [
         Clustering([], "this is of type A"),
         Clustering([], "this is of type B"),
         Clustering([], "this is of type C"),
         Clustering([], "this is of type B"),
         Clustering([], "this is of type S"),
         Clustering([], "this is of type A"),
         Clustering([], "this is of type A"),
         Clustering([], "this is of type C"),
         Clustering([], "this is of type D")
     ]
     counter = Clustering.classify(tags, clusterings)
     self.assertEqual(counter['A'], 3)
     self.assertEqual(counter['B'], 2)
     self.assertEqual(counter['C'], 2)

Example #21

0

Show file

File: TestGraphMetrics.py Project: ztypaker/pyProCT

 def test_getClusterAndComplementary(self):
     clustering = Clustering([
         Cluster(1, range(5)),
         Cluster(5, range(5, 10)),
         Cluster(10, range(10, 20))
     ])
     A, Acomp = get_cluster_and_complementary(1, clustering.clusters)
     A.sort()
     Acomp.sort()
     self.assertItemsEqual(A, [0, 1, 2, 3, 4])
     self.assertItemsEqual(
         Acomp, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

Example #22

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

    def test_number_of_clusters_needed_to_get_this_percent_of_elems(self):
        clusters = (Cluster(16, [16]), Cluster(4, [4, 5, 6, 7, 8]),
                    Cluster(0, [0, 1, 2, 3]),
                    Cluster(9, [9, 10, 11, 12, 13, 14, 15]))

        clustering = Clustering(clusters)

        self.assertEqual(clustering.number_of_clusters_to_get_percent(71), 3)
        self.assertEqual(clustering.number_of_clusters_to_get_percent(70), 2)
        self.assertEqual(clustering.number_of_clusters_to_get_percent(40), 1)
        self.assertEqual(clustering.number_of_clusters_to_get_percent(42), 2)
        self.assertEqual(clustering.number_of_clusters_to_get_percent(100), 4)

Example #23

0

Show file

File: TestPCAMetric.py Project: ztypaker/pyProCT

 def test_PCA(self):
     """
     Regression test.
     """
     trajectory_handler = TrajectoryHandlerStub(
         testPCAMetric.not_iterposed_coordsets, 66)
     clustering = Clustering(
         [Cluster(None, range(6)),
          Cluster(None, range(6, 12))], "a clustering")
     pcaMetric = PCAMetric(trajectory_handler)
     self.assertAlmostEquals(pcaMetric.evaluate(clustering), 1.427748687873,
                             12)

Example #24

0

Show file

File: TestCalinskiHarabasz.py Project: ztypaker/pyProCT

    def test_evaluation(self):
        clusterings = [
            {
                "clustering":
                Clustering(
                    [Cluster(None, [0, 1, 2, 3]),
                     Cluster(None, [4, 5])]),
                "result":
                3.74
            },
            {
                "clustering":
                Clustering([
                    Cluster(None, [0, 1]),
                    Cluster(None, [2, 3]),
                    Cluster(None, [4, 5])
                ]),
                "result":
                3.705
            },
            {
                "clustering":
                Clustering([
                    Cluster(None, [0, 1]),
                    Cluster(None, [2]),
                    Cluster(None, [3]),
                    Cluster(None, [4, 5])
                ]),
                "result":
                2.91
            },
        ]

        calculator = CalinskiHarabaszCalculator()
        matrix = CondensedMatrix(CH_table1)

        for i in range(len(clusterings)):
            self.assertAlmostEqual(
                clusterings[i]["result"],
                calculator.evaluate(clusterings[i]["clustering"], matrix), 2)

Example #25

0

Show file

File: Refiner.py Project: ztypaker/pyProCT

 def repartition_with_kmedoids(cls, initial_cluster, k, submatrix):
     partitioned_clustering = cls.KMedoidsAlgorithmClass(
         submatrix).perform_clustering({
             "k": k,
             "seeding_type": "RANDOM",
             "tries": 10
         })
     remapped_clusters = []
     for partitioned_cluster in partitioned_clustering.clusters:
         remapped_clusters.append(
             cls.redefine_cluster_with_map(initial_cluster,
                                           partitioned_cluster))
     return Clustering(remapped_clusters)

Example #26

0

Show file

File: TestSilhouette.py Project: ztypaker/pyProCT

 def test_one_clusterization_silhouette(self):
     distances =  CondensedMatrix( [ 1., 2., 3., 4.,
                                         5., 6., 7., 
                                             8., 9., 
                                                10.])
     clusters_1 = [Cluster(None, elements=[0,1]),
                   Cluster(None, elements=[2] ),
                   Cluster(None, elements=[3,4])]
     
     clusterization_1 = Clustering(clusters_1)
     sil_calc = SilhouetteCoefficientCalculator()
     expected = [0.5, 0.80000000000000004, -0.55000000000000004, -0.45000000000000001, 0.7142857142857143]
     
     self.assertItemsEqual(sil_calc._SilhouetteCoefficientCalculator__one_clusterization_partial_silhouette(clusterization_1,distances),expected)

Example #27

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

 def test_get_proportional_size_representatives(self):
     clusters = [
         ClusterMock(range(0, 10)),
         ClusterMock(range(10, 50)),
         ClusterMock(range(50, 80)),
         ClusterMock(range(80, 200))
     ]
     clustering = Clustering(clusters)
     rep = clustering.get_proportional_size_representatives(
         30, "distance_matrix")
     self.assertItemsEqual(rep, [
         0, 0, 10, 10, 11, 12, 13, 14, 50, 50, 51, 52, 53, 80, 80, 81, 82,
         83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96
     ])

Example #28

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

    def test_load_and_save_to_disk(self):
        clusters = (Cluster(16, [16]), Cluster(4, [4, 5, 6, 7, 8]),
                    Cluster(0, [0, 1, 2, 3]),
                    Cluster(9, [9, 10, 11, 12, 13, 14, 15]))

        clustering = Clustering(clusters)
        before_saving_elements = clustering.get_all_clustered_elements()
        clustering.save_to_disk(
            os.path.join(test_data.__path__[0], "saved_clustering_for_test"))
        loaded_clustering = Clustering.load_from_disk(
            os.path.join(test_data.__path__[0], "saved_clustering_for_test"))
        after_saving_elements = loaded_clustering.get_all_clustered_elements()
        self.assertItemsEqual(before_saving_elements, after_saving_elements)
        os.system("rm data/saved_clustering_for_test")

Example #29

0

Show file

File: TestMetrics.py Project: ztypaker/pyProCT

    def test_regression_separation_eval(self):
        distances = CondensedMatrix([1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
        clusters = [
            Cluster(None, elements=[0, 1]),
            Cluster(None, elements=[2]),
            Cluster(None, elements=[3, 4])
        ]
        clustering = Clustering(clusters)

        sep_calctor = SeparationCalculator()
        self.assertEqual(
            sep_calctor.evaluate(clustering, distances, [1, 1, 1]),
            27.0 + 24.0 + 37.0)
        self.assertEqual(sep_calctor.evaluate(clustering, distances),
                         (1 / 0.5) * 27.0 + (1 / 5.0) * 37.0)

Example #30

0

Show file

File: TestClustering.py Project: ztypaker/pyProCT

    def test_equality(self):
        clusteringA = Clustering([
            Cluster(16, [16]),
            Cluster(4, [4, 5, 6, 7, 8]),
            Cluster(0, [0, 1, 2, 3]),
            Cluster(9, [9, 10, 11, 12, 13, 14, 15])
        ])

        clusteringB = Clustering([
            Cluster(16, [16]),
            Cluster(9, [9, 10, 11, 12, 13, 14, 15]),
            Cluster(0, [0, 1, 2, 3]),
            Cluster(4, [4, 5, 6, 7, 8])
        ])

        clusteringC = Clustering([
            Cluster(13, [13]),
            Cluster(9, [9, 10, 11, 12, 16, 14, 15]),
            Cluster(0, [0, 1]),
            Cluster(4, [2, 3, 4, 5, 6, 7, 8])
        ])

        self.assertEqual(clusteringA, clusteringB)
        self.assertNotEqual(clusteringA, clusteringC)