Beispiel #1
0
    def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix,
                           analysis):
        """ 
        Performs the overlap analysis of a clustering (calculates global measurements).
        """
        analysis["total_num_clusters"] = 0
        analysis["total_num_elements"] = 0
        analysis["overlap"] = OverlapCalculator.calculate_clustering_overlap(
            mergeSeparatedClusters(separated_decomposed_clusters),
            distance_matrix)
        analysis[
            "mixed_overlap"] = OverlapCalculator.calculate_clustering_overlap(
                mergeSeparatedClusters(
                    {"mixed": separated_decomposed_clusters["mixed"]}),
                distance_matrix)

        for cluster_type in separated_decomposed_clusters:
            analysis["num_" + cluster_type] = len(
                separated_decomposed_clusters[cluster_type])
            analysis["total_num_clusters"] += analysis["num_" + cluster_type]
            analysis["num_" + cluster_type + "_elements"] = numpy.sum([
                len(
                    getAllElements(
                        separated_decomposed_clusters[cluster_type][dc_id]))
                for dc_id in separated_decomposed_clusters[cluster_type]
            ])
            analysis["total_num_elements"] += analysis["num_" + cluster_type +
                                                       "_elements"]
    def test_get_cluster_min_max_distances(self):
        # First test
        distance_matrix = CondensedMatrix([1., 0.7,
                                               0.3])

        decomposed_cluster = {"traj_0":[0],"traj_1":[1],"traj_2":[2]}

        expected_min_d, expected_max_d = [ 0.7, 0.3, 0.3], [ 1., 1., 0.7]

        min_d, max_d = OverlapCalculator.get_cluster_min_max_distances(decomposed_cluster, distance_matrix)

        numpy.testing.assert_array_almost_equal(min_d, expected_min_d, 8)
        numpy.testing.assert_array_almost_equal(max_d, expected_max_d, 8)

        #Second test
        distance_matrix = CondensedMatrix([1., 0.7, 2.,
                                               0.3, 1.,
                                                    0.7])


        decomposed_cluster = {"traj_0":[0,3],"traj_1":[1],"traj_2":[2]}

        expected_min_d, expected_max_d = [ 0.7,  0.7,  0.3,  0.3], [ 1., 1., 1.,  0.7]

        min_d, max_d =  OverlapCalculator.get_cluster_min_max_distances(decomposed_cluster, distance_matrix)

        numpy.testing.assert_array_almost_equal(min_d, expected_min_d, 8)
        numpy.testing.assert_array_almost_equal(max_d, expected_max_d, 8)
    def test_calculate_global_overlap(self):
        distance_matrix = CondensedMatrix([1., 0.7, 2.,
                                               0.3, 1.,
                                                    0.7])

        decomposed_clusters = [{"traj_0":[0],"traj_1":[1]},{"traj_0":[2],"traj_1":[3]}]

        self.assertEqual(0., OverlapCalculator.calculate_global_overlap(decomposed_clusters, distance_matrix, 1, 1))

        decomposed_clusters = [{"traj_0":[0],"traj_1":[1]}, {"traj_0":[2]}, {"traj_1":[3]}]

        self.assertEqual(0., OverlapCalculator.calculate_global_overlap(decomposed_clusters, distance_matrix, 1, 1))
    def test_calculate_cluster_overlap(self):
        distance_matrix = CondensedMatrix([1., 0.7,
                                               0.3])

        decomposed_cluster = {"traj_0":[0],"traj_1":[1],"traj_2":[2]}


        self.assertAlmostEqual(0.481481488022, OverlapCalculator.calculate_cluster_overlap(1, decomposed_cluster, distance_matrix),12)
        self.assertAlmostEqual(0.4761904843, OverlapCalculator.calculate_cluster_overlap(2, decomposed_cluster, distance_matrix), 12)

        decomposed_cluster = {"traj_0":[0],"traj_1":[1]}
        self.assertAlmostEqual(1., OverlapCalculator.calculate_cluster_overlap(1, decomposed_cluster, distance_matrix), 12)
        self.assertAlmostEqual(1., OverlapCalculator.calculate_cluster_overlap(2, decomposed_cluster, distance_matrix), 12)
 def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix, analysis):
     """ 
     Performs the overlap analysis of a clustering (calculates global measurements).
     """
     analysis["total_num_clusters"] = 0
     analysis["total_num_elements"] = 0
     analysis["overlap"] = OverlapCalculator.calculate_clustering_overlap(mergeSeparatedClusters(separated_decomposed_clusters), distance_matrix)
     analysis["mixed_overlap"] = OverlapCalculator.calculate_clustering_overlap(mergeSeparatedClusters({"mixed":separated_decomposed_clusters["mixed"]}), distance_matrix)
    
     for cluster_type in separated_decomposed_clusters:
         analysis["num_" + cluster_type] = len(separated_decomposed_clusters[cluster_type])
         analysis["total_num_clusters"] += analysis["num_" + cluster_type]
         analysis["num_" + cluster_type + "_elements"] = numpy.sum([len(getAllElements(separated_decomposed_clusters[cluster_type][dc_id])) for dc_id in separated_decomposed_clusters[cluster_type]])
         analysis["total_num_elements"] += analysis["num_" + cluster_type + "_elements"]
Beispiel #6
0
 def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix, analysis):
     analysis["total_num_clusters"] = 0
     analysis["total_num_elements"] = 0
     analysis["overlap"] = OverlapCalculator.calculate_global_overlap(mergeSeparatedClusters(separated_decomposed_clusters), distance_matrix, 2, 1)
     for cluster_type in separated_decomposed_clusters:
         analysis["num_" + cluster_type] = len(separated_decomposed_clusters[cluster_type])
         analysis["total_num_clusters"] += analysis["num_" + cluster_type]
         analysis["num_" + cluster_type + "_elements"] = numpy.sum([len(getAllElements(separated_decomposed_clusters[cluster_type][dc_id])) for dc_id in separated_decomposed_clusters[cluster_type]])
         analysis["total_num_elements"] += analysis["num_" + cluster_type + "_elements"]
     return cluster_type
Beispiel #7
0
    def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis):
        for cluster_type in separated_decomposed_clusters:
            for cluster_id in separated_decomposed_clusters[cluster_type]:
                decomposed_cluster = separated_decomposed_clusters[cluster_type][cluster_id]
                analysis[cluster_id] = {"components":decomposed_cluster.keys(),"global":{}}
                analysis[cluster_id]["global"]["mean"], analysis[cluster_id]["global"]["std"], analysis[cluster_id]["global"]["max"] = calculate_distance_stats(getAllElements(decomposed_cluster), distance_matrix)
                analysis[cluster_id]["global"]["num_elements"] = len(getAllElements(decomposed_cluster))

                for traj_id in decomposed_cluster:
                    analysis[cluster_id]["global"][traj_id] = {}
                    analysis[cluster_id]["global"][traj_id]["mean"], analysis[cluster_id]["global"][traj_id]["std"], analysis[cluster_id]["global"][traj_id]["max"] = calculate_distance_stats(decomposed_cluster[traj_id], distance_matrix)
                    analysis[cluster_id]["global"][traj_id]["num_elements"] = len(decomposed_cluster[traj_id])

                if cluster_type == "mixed":
                    analysis[cluster_id]["centers_mean_diff"] = calculate_mean_center_differences(decomposed_cluster, distance_matrix)
                    analysis[cluster_id]["global"]["overlap"] = OverlapCalculator.calculate_cluster_overlap(2, decomposed_cluster, distance_matrix)
Beispiel #8
0
 def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix,
                        analysis):
     analysis["total_num_clusters"] = 0
     analysis["total_num_elements"] = 0
     analysis["overlap"] = OverlapCalculator.calculate_global_overlap(
         mergeSeparatedClusters(separated_decomposed_clusters),
         distance_matrix, 2, 1)
     for cluster_type in separated_decomposed_clusters:
         analysis["num_" + cluster_type] = len(
             separated_decomposed_clusters[cluster_type])
         analysis["total_num_clusters"] += analysis["num_" + cluster_type]
         analysis["num_" + cluster_type + "_elements"] = numpy.sum([
             len(
                 getAllElements(
                     separated_decomposed_clusters[cluster_type][dc_id]))
             for dc_id in separated_decomposed_clusters[cluster_type]
         ])
         analysis["total_num_elements"] += analysis["num_" + cluster_type +
                                                    "_elements"]
     return cluster_type
Beispiel #9
0
    def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix,
                         analysis):
        """
        Performs the overlap analysis of separated clusters.
        """
        for cluster_type in separated_decomposed_clusters:
            for cluster_id in separated_decomposed_clusters[cluster_type]:
                decomposed_cluster = separated_decomposed_clusters[
                    cluster_type][cluster_id]
                analysis[cluster_id] = {
                    "components": decomposed_cluster.keys(),
                    "global": {}
                }
                analysis[cluster_id]["global"]["mean"], analysis[cluster_id][
                    "global"]["std"], analysis[cluster_id]["global"][
                        "max"] = calculate_distance_stats(
                            getAllElements(decomposed_cluster),
                            distance_matrix)
                analysis[cluster_id]["global"]["num_elements"] = len(
                    getAllElements(decomposed_cluster))

                for traj_id in decomposed_cluster:
                    analysis[cluster_id]["global"][traj_id] = {}
                    analysis[cluster_id]["global"][traj_id]["mean"], analysis[
                        cluster_id]["global"][traj_id]["std"], analysis[
                            cluster_id]["global"][traj_id][
                                "max"] = calculate_distance_stats(
                                    decomposed_cluster[traj_id],
                                    distance_matrix)
                    analysis[cluster_id]["global"][traj_id][
                        "num_elements"] = len(decomposed_cluster[traj_id])

                if cluster_type == "mixed":
                    analysis[cluster_id][
                        "centers_mean_diff"] = calculate_mean_center_differences(
                            decomposed_cluster, distance_matrix)
                    # The overlap ranges between 0 and 1, being 0 the best value. We invert it in order to
                    # to get a more understandable range (1 is the best value and 0 the worst).
                    analysis[cluster_id]["global"][
                        "overlap"] = 1 - OverlapCalculator.calculate_cluster_overlap(
                            decomposed_cluster, distance_matrix)
    def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis):
        """
        Performs the overlap analysis of separated clusters.
        """
        for cluster_type in separated_decomposed_clusters:
            for cluster_id in separated_decomposed_clusters[cluster_type]:
                decomposed_cluster = separated_decomposed_clusters[cluster_type][cluster_id]
                analysis[cluster_id] = {"components":decomposed_cluster.keys(),"global":{}}
                analysis[cluster_id]["global"]["mean"], analysis[cluster_id]["global"]["std"], analysis[cluster_id]["global"]["max"] = calculate_distance_stats(getAllElements(decomposed_cluster), distance_matrix)
                analysis[cluster_id]["global"]["num_elements"] = len(getAllElements(decomposed_cluster))

                for traj_id in decomposed_cluster:
                    analysis[cluster_id]["global"][traj_id] = {}
                    analysis[cluster_id]["global"][traj_id]["mean"], analysis[cluster_id]["global"][traj_id]["std"], analysis[cluster_id]["global"][traj_id]["max"] = calculate_distance_stats(decomposed_cluster[traj_id], distance_matrix)
                    analysis[cluster_id]["global"][traj_id]["num_elements"] = len(decomposed_cluster[traj_id])

                if cluster_type == "mixed":
                    analysis[cluster_id]["centers_mean_diff"] = calculate_mean_center_differences(decomposed_cluster, distance_matrix)
                    # The overlap ranges between 0 and 1, being 0 the best value. We invert it in order to
                    # to get a more understandable range (1 is the best value and 0 the worst). 
                    analysis[cluster_id]["global"]["overlap"] = 1 - OverlapCalculator.calculate_cluster_overlap( decomposed_cluster, distance_matrix)
Beispiel #11
0
    def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix,
                         analysis):
        for cluster_type in separated_decomposed_clusters:
            for cluster_id in separated_decomposed_clusters[cluster_type]:
                decomposed_cluster = separated_decomposed_clusters[
                    cluster_type][cluster_id]
                analysis[cluster_id] = {
                    "components": decomposed_cluster.keys(),
                    "global": {}
                }
                analysis[cluster_id]["global"]["mean"], analysis[cluster_id][
                    "global"]["std"], analysis[cluster_id]["global"][
                        "max"] = calculate_distance_stats(
                            getAllElements(decomposed_cluster),
                            distance_matrix)
                analysis[cluster_id]["global"]["num_elements"] = len(
                    getAllElements(decomposed_cluster))

                for traj_id in decomposed_cluster:
                    analysis[cluster_id]["global"][traj_id] = {}
                    analysis[cluster_id]["global"][traj_id]["mean"], analysis[
                        cluster_id]["global"][traj_id]["std"], analysis[
                            cluster_id]["global"][traj_id][
                                "max"] = calculate_distance_stats(
                                    decomposed_cluster[traj_id],
                                    distance_matrix)
                    analysis[cluster_id]["global"][traj_id][
                        "num_elements"] = len(decomposed_cluster[traj_id])

                if cluster_type == "mixed":
                    analysis[cluster_id][
                        "centers_mean_diff"] = calculate_mean_center_differences(
                            decomposed_cluster, distance_matrix)
                    analysis[cluster_id]["global"][
                        "overlap"] = OverlapCalculator.calculate_cluster_overlap(
                            2, decomposed_cluster, distance_matrix)