Beispiel #1
0
    def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix,
                           analysis):
        """ 
        Performs the overlap analysis of a clustering (calculates global measurements).
        """
        analysis["total_num_clusters"] = 0
        analysis["total_num_elements"] = 0
        analysis["overlap"] = OverlapCalculator.calculate_clustering_overlap(
            mergeSeparatedClusters(separated_decomposed_clusters),
            distance_matrix)
        analysis[
            "mixed_overlap"] = OverlapCalculator.calculate_clustering_overlap(
                mergeSeparatedClusters(
                    {"mixed": separated_decomposed_clusters["mixed"]}),
                distance_matrix)

        for cluster_type in separated_decomposed_clusters:
            analysis["num_" + cluster_type] = len(
                separated_decomposed_clusters[cluster_type])
            analysis["total_num_clusters"] += analysis["num_" + cluster_type]
            analysis["num_" + cluster_type + "_elements"] = numpy.sum([
                len(
                    getAllElements(
                        separated_decomposed_clusters[cluster_type][dc_id]))
                for dc_id in separated_decomposed_clusters[cluster_type]
            ])
            analysis["total_num_elements"] += analysis["num_" + cluster_type +
                                                       "_elements"]
Beispiel #2
0
 def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix, analysis):
     analysis["total_num_clusters"] = 0
     analysis["total_num_elements"] = 0
     analysis["overlap"] = OverlapCalculator.calculate_global_overlap(mergeSeparatedClusters(separated_decomposed_clusters), distance_matrix, 2, 1)
     for cluster_type in separated_decomposed_clusters:
         analysis["num_" + cluster_type] = len(separated_decomposed_clusters[cluster_type])
         analysis["total_num_clusters"] += analysis["num_" + cluster_type]
         analysis["num_" + cluster_type + "_elements"] = numpy.sum([len(getAllElements(separated_decomposed_clusters[cluster_type][dc_id])) for dc_id in separated_decomposed_clusters[cluster_type]])
         analysis["total_num_elements"] += analysis["num_" + cluster_type + "_elements"]
     return cluster_type
Beispiel #3
0
    def calculate_global_overlap(cls, decomposed_clusters, distance_matrix, cluster_method, global_method):
        """

        """
        sys.stdout.flush()
        cluster_overlaps = numpy.array([ cls.calculate_cluster_overlap(cluster_method, decomposed_cluster, distance_matrix) for decomposed_cluster in decomposed_clusters])
        cluster_sizes = numpy.array([len(getAllElements(decomposed_cluster)) for decomposed_cluster in decomposed_clusters])
        global_size = numpy.sum(cluster_sizes)
        overlap = numpy.sum(cluster_overlaps * cluster_sizes) / float(global_size)
        # Overlap 0 is the best overlap, overlap 1 is the worst. We reverse it to do it more easy to understand
        return 1 - overlap
Beispiel #4
0
    def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix,
                         analysis):
        """
        Performs the overlap analysis of separated clusters.
        """
        for cluster_type in separated_decomposed_clusters:
            for cluster_id in separated_decomposed_clusters[cluster_type]:
                decomposed_cluster = separated_decomposed_clusters[
                    cluster_type][cluster_id]
                analysis[cluster_id] = {
                    "components": decomposed_cluster.keys(),
                    "global": {}
                }
                analysis[cluster_id]["global"]["mean"], analysis[cluster_id][
                    "global"]["std"], analysis[cluster_id]["global"][
                        "max"] = calculate_distance_stats(
                            getAllElements(decomposed_cluster),
                            distance_matrix)
                analysis[cluster_id]["global"]["num_elements"] = len(
                    getAllElements(decomposed_cluster))

                for traj_id in decomposed_cluster:
                    analysis[cluster_id]["global"][traj_id] = {}
                    analysis[cluster_id]["global"][traj_id]["mean"], analysis[
                        cluster_id]["global"][traj_id]["std"], analysis[
                            cluster_id]["global"][traj_id][
                                "max"] = calculate_distance_stats(
                                    decomposed_cluster[traj_id],
                                    distance_matrix)
                    analysis[cluster_id]["global"][traj_id][
                        "num_elements"] = len(decomposed_cluster[traj_id])

                if cluster_type == "mixed":
                    analysis[cluster_id][
                        "centers_mean_diff"] = calculate_mean_center_differences(
                            decomposed_cluster, distance_matrix)
                    # The overlap ranges between 0 and 1, being 0 the best value. We invert it in order to
                    # to get a more understandable range (1 is the best value and 0 the worst).
                    analysis[cluster_id]["global"][
                        "overlap"] = 1 - OverlapCalculator.calculate_cluster_overlap(
                            decomposed_cluster, distance_matrix)
    def calculate_clustering_overlap(cls, decomposed_clusters, distance_matrix):
        """

        """
        cluster_overlaps = numpy.array([ cls.calculate_cluster_overlap(decomposed_cluster, distance_matrix) for decomposed_cluster in decomposed_clusters])
        cluster_sizes = numpy.array([len(getAllElements(decomposed_cluster)) for decomposed_cluster in decomposed_clusters])

        num_elements = numpy.sum(cluster_sizes)
        clustering_overlap =  ((1./num_elements)* numpy.dot(cluster_overlaps, cluster_sizes))
        
        # Overlap 0 is the best overlap, overlap 1 is the worst. We reverse it to do it more easy to understand
        return 1 - clustering_overlap
 def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix, analysis):
     """ 
     Performs the overlap analysis of a clustering (calculates global measurements).
     """
     analysis["total_num_clusters"] = 0
     analysis["total_num_elements"] = 0
     analysis["overlap"] = OverlapCalculator.calculate_clustering_overlap(mergeSeparatedClusters(separated_decomposed_clusters), distance_matrix)
     analysis["mixed_overlap"] = OverlapCalculator.calculate_clustering_overlap(mergeSeparatedClusters({"mixed":separated_decomposed_clusters["mixed"]}), distance_matrix)
    
     for cluster_type in separated_decomposed_clusters:
         analysis["num_" + cluster_type] = len(separated_decomposed_clusters[cluster_type])
         analysis["total_num_clusters"] += analysis["num_" + cluster_type]
         analysis["num_" + cluster_type + "_elements"] = numpy.sum([len(getAllElements(separated_decomposed_clusters[cluster_type][dc_id])) for dc_id in separated_decomposed_clusters[cluster_type]])
         analysis["total_num_elements"] += analysis["num_" + cluster_type + "_elements"]
Beispiel #7
0
    def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix,
                         analysis):
        for cluster_type in separated_decomposed_clusters:
            for cluster_id in separated_decomposed_clusters[cluster_type]:
                decomposed_cluster = separated_decomposed_clusters[
                    cluster_type][cluster_id]
                analysis[cluster_id] = {
                    "components": decomposed_cluster.keys(),
                    "global": {}
                }
                analysis[cluster_id]["global"]["mean"], analysis[cluster_id][
                    "global"]["std"], analysis[cluster_id]["global"][
                        "max"] = calculate_distance_stats(
                            getAllElements(decomposed_cluster),
                            distance_matrix)
                analysis[cluster_id]["global"]["num_elements"] = len(
                    getAllElements(decomposed_cluster))

                for traj_id in decomposed_cluster:
                    analysis[cluster_id]["global"][traj_id] = {}
                    analysis[cluster_id]["global"][traj_id]["mean"], analysis[
                        cluster_id]["global"][traj_id]["std"], analysis[
                            cluster_id]["global"][traj_id][
                                "max"] = calculate_distance_stats(
                                    decomposed_cluster[traj_id],
                                    distance_matrix)
                    analysis[cluster_id]["global"][traj_id][
                        "num_elements"] = len(decomposed_cluster[traj_id])

                if cluster_type == "mixed":
                    analysis[cluster_id][
                        "centers_mean_diff"] = calculate_mean_center_differences(
                            decomposed_cluster, distance_matrix)
                    analysis[cluster_id]["global"][
                        "overlap"] = OverlapCalculator.calculate_cluster_overlap(
                            2, decomposed_cluster, distance_matrix)
Beispiel #8
0
    def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis):
        for cluster_type in separated_decomposed_clusters:
            for cluster_id in separated_decomposed_clusters[cluster_type]:
                decomposed_cluster = separated_decomposed_clusters[cluster_type][cluster_id]
                analysis[cluster_id] = {"components":decomposed_cluster.keys(),"global":{}}
                analysis[cluster_id]["global"]["mean"], analysis[cluster_id]["global"]["std"], analysis[cluster_id]["global"]["max"] = calculate_distance_stats(getAllElements(decomposed_cluster), distance_matrix)
                analysis[cluster_id]["global"]["num_elements"] = len(getAllElements(decomposed_cluster))

                for traj_id in decomposed_cluster:
                    analysis[cluster_id]["global"][traj_id] = {}
                    analysis[cluster_id]["global"][traj_id]["mean"], analysis[cluster_id]["global"][traj_id]["std"], analysis[cluster_id]["global"][traj_id]["max"] = calculate_distance_stats(decomposed_cluster[traj_id], distance_matrix)
                    analysis[cluster_id]["global"][traj_id]["num_elements"] = len(decomposed_cluster[traj_id])

                if cluster_type == "mixed":
                    analysis[cluster_id]["centers_mean_diff"] = calculate_mean_center_differences(decomposed_cluster, distance_matrix)
                    analysis[cluster_id]["global"]["overlap"] = OverlapCalculator.calculate_cluster_overlap(2, decomposed_cluster, distance_matrix)
Beispiel #9
0
    def calculate_clustering_overlap(cls, decomposed_clusters,
                                     distance_matrix):
        """

        """
        cluster_overlaps = numpy.array([
            cls.calculate_cluster_overlap(decomposed_cluster, distance_matrix)
            for decomposed_cluster in decomposed_clusters
        ])
        cluster_sizes = numpy.array([
            len(getAllElements(decomposed_cluster))
            for decomposed_cluster in decomposed_clusters
        ])

        num_elements = numpy.sum(cluster_sizes)
        clustering_overlap = ((1. / num_elements) *
                              numpy.dot(cluster_overlaps, cluster_sizes))

        # Overlap 0 is the best overlap, overlap 1 is the worst. We reverse it to do it more easy to understand
        return 1 - clustering_overlap
Beispiel #10
0
    def calculate_global_overlap(cls, decomposed_clusters, distance_matrix,
                                 cluster_method, global_method):
        """

        """
        sys.stdout.flush()
        cluster_overlaps = numpy.array([
            cls.calculate_cluster_overlap(cluster_method, decomposed_cluster,
                                          distance_matrix)
            for decomposed_cluster in decomposed_clusters
        ])
        cluster_sizes = numpy.array([
            len(getAllElements(decomposed_cluster))
            for decomposed_cluster in decomposed_clusters
        ])
        global_size = numpy.sum(cluster_sizes)
        overlap = numpy.sum(
            cluster_overlaps * cluster_sizes) / float(global_size)
        # Overlap 0 is the best overlap, overlap 1 is the worst. We reverse it to do it more easy to understand
        return 1 - overlap
    def calculate_cluster_overlap(cls, decomposed_cluster, distance_matrix):
        """
        Calculates the overlap value for a cluster in a range [0,1].

        @param decomposed_cluster: A
        """
        if len(decomposed_cluster) == 1:
                return 1.0 # If the cluster is 'pure' we penalize the global overlap

        else:
            N = len(getAllElements(decomposed_cluster))
                    
            min_distances = cls.get_cluster_min_distances(decomposed_cluster, distance_matrix)
            
            max_min_distance = max(min_distances)
            
            if max_min_distance == 0: # Then overlap is total
                return 0.0
            else:
                return (1./N) * (numpy.sum(min_distances) /max_min_distance)
Beispiel #12
0
    def calculate_cluster_overlap(cls, method, decomposed_cluster, distance_matrix):
        """
        Calculates the overlap value for a cluster in a range [0,1].

        @param decomposed_cluster: A
        """
        min_distances, max_distances= cls.get_cluster_min_max_distances(decomposed_cluster, distance_matrix)

        if len(min_distances) == 0:
            return 1.

        if method == 1:
            return numpy.sum(min_distances) / numpy.sum(max_distances)

        elif method == 2:
            return numpy.sum(min_distances / max_distances) / len(getAllElements(decomposed_cluster))

        else:
            print "[ERROR OverlapCalculator::calculate_cluster_overlap] The method nr. %d does not exist."%(method)
            exit()
Beispiel #13
0
 def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix,
                        analysis):
     analysis["total_num_clusters"] = 0
     analysis["total_num_elements"] = 0
     analysis["overlap"] = OverlapCalculator.calculate_global_overlap(
         mergeSeparatedClusters(separated_decomposed_clusters),
         distance_matrix, 2, 1)
     for cluster_type in separated_decomposed_clusters:
         analysis["num_" + cluster_type] = len(
             separated_decomposed_clusters[cluster_type])
         analysis["total_num_clusters"] += analysis["num_" + cluster_type]
         analysis["num_" + cluster_type + "_elements"] = numpy.sum([
             len(
                 getAllElements(
                     separated_decomposed_clusters[cluster_type][dc_id]))
             for dc_id in separated_decomposed_clusters[cluster_type]
         ])
         analysis["total_num_elements"] += analysis["num_" + cluster_type +
                                                    "_elements"]
     return cluster_type
    def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis):
        """
        Performs the overlap analysis of separated clusters.
        """
        for cluster_type in separated_decomposed_clusters:
            for cluster_id in separated_decomposed_clusters[cluster_type]:
                decomposed_cluster = separated_decomposed_clusters[cluster_type][cluster_id]
                analysis[cluster_id] = {"components":decomposed_cluster.keys(),"global":{}}
                analysis[cluster_id]["global"]["mean"], analysis[cluster_id]["global"]["std"], analysis[cluster_id]["global"]["max"] = calculate_distance_stats(getAllElements(decomposed_cluster), distance_matrix)
                analysis[cluster_id]["global"]["num_elements"] = len(getAllElements(decomposed_cluster))

                for traj_id in decomposed_cluster:
                    analysis[cluster_id]["global"][traj_id] = {}
                    analysis[cluster_id]["global"][traj_id]["mean"], analysis[cluster_id]["global"][traj_id]["std"], analysis[cluster_id]["global"][traj_id]["max"] = calculate_distance_stats(decomposed_cluster[traj_id], distance_matrix)
                    analysis[cluster_id]["global"][traj_id]["num_elements"] = len(decomposed_cluster[traj_id])

                if cluster_type == "mixed":
                    analysis[cluster_id]["centers_mean_diff"] = calculate_mean_center_differences(decomposed_cluster, distance_matrix)
                    # The overlap ranges between 0 and 1, being 0 the best value. We invert it in order to
                    # to get a more understandable range (1 is the best value and 0 the worst). 
                    analysis[cluster_id]["global"]["overlap"] = 1 - OverlapCalculator.calculate_cluster_overlap( decomposed_cluster, distance_matrix)
Beispiel #15
0
    def calculate_cluster_overlap(cls, decomposed_cluster, distance_matrix):
        """
        Calculates the overlap value for a cluster in a range [0,1].

        @param decomposed_cluster: A
        """
        if len(decomposed_cluster) == 1:
            return 1.0  # If the cluster is 'pure' we penalize the global overlap

        else:
            N = len(getAllElements(decomposed_cluster))

            min_distances = cls.get_cluster_min_distances(
                decomposed_cluster, distance_matrix)

            max_min_distance = max(min_distances)

            if max_min_distance == 0:  # Then overlap is total
                return 0.0
            else:
                return (1. / N) * (numpy.sum(min_distances) / max_min_distance)
Beispiel #16
0
    def test_decompose(self):
        traj_ranges = {"traj_A": (0, 6), "traj_B": (7, 15)}
        clusters = [
            Cluster(None, [0, 1, 2]),
            Cluster(None, [3, 8, 10]),
            Cluster(None, [14, 4, 15]),
            Cluster(None, [5, 6]),
            Cluster(None, [7, 9, 11, 12, 13]),
        ]

        for i in range(len(clusters)):
            clusters[i].id = str(i)

        decomposed = Separator.decompose(clusters, traj_ranges)
        all_elements = []

        for cluster_id in decomposed:
            all_elements.extend(getAllElements(decomposed[cluster_id]))

        expected = {
            '0': {
                'traj_A': [0, 1, 2]
            },
            '1': {
                'traj_A': [3],
                'traj_B': [8, 10]
            },
            '2': {
                'traj_A': [4],
                'traj_B': [14, 15]
            },
            '3': {
                'traj_A': [5, 6]
            },
            '4': {
                'traj_B': [9, 11, 12, 13, 7]
            }
        }
        self.assertItemsEqual(range(16), sorted(all_elements))
        self.assertDictEqual(expected, decomposed)
Beispiel #17
0
    def test_decompose(self):
        traj_ranges = {"traj_A":(0,6),"traj_B":(7,15)}
        clusters = [
                    Cluster(None,[0,1,2]),
                    Cluster(None,[3,8,10]),
                    Cluster(None,[14,4,15]),
                    Cluster(None,[5,6]),
                    Cluster(None,[7,9,11,12,13]),
                    ]

        for i in range(len(clusters)):
            clusters[i].id = str(i)

        decomposed = Separator.decompose(clusters, traj_ranges)
        all_elements = []

        for cluster_id in decomposed:
            all_elements.extend(getAllElements(decomposed[cluster_id]))

        expected = {
                    '0': {
                          'traj_A': [0, 1, 2]
                          },
                    '1': {
                          'traj_A': [3],
                          'traj_B': [8, 10]
                          },
                    '2': {
                          'traj_A': [4],
                          'traj_B': [14, 15]
                          },
                    '3': {
                          'traj_A': [5, 6]
                          },
                    '4': {
                          'traj_B': [9, 11, 12, 13, 7]
                          }
                    }
        self.assertItemsEqual(range(16),sorted(all_elements))
        self.assertDictEqual(expected, decomposed )
Beispiel #18
0
    def calculate_cluster_overlap(cls, method, decomposed_cluster,
                                  distance_matrix):
        """
        Calculates the overlap value for a cluster in a range [0,1].

        @param decomposed_cluster: A
        """
        min_distances, max_distances = cls.get_cluster_min_max_distances(
            decomposed_cluster, distance_matrix)

        if len(min_distances) == 0:
            return 1.

        if method == 1:
            return numpy.sum(min_distances) / numpy.sum(max_distances)

        elif method == 2:
            return numpy.sum(min_distances / max_distances) / len(
                getAllElements(decomposed_cluster))

        else:
            print "[ERROR OverlapCalculator::calculate_cluster_overlap] The method nr. %d does not exist." % (
                method)
            exit()
Beispiel #19
0
 def test_getAllElements(self):
     numpy.testing.assert_array_equal( sorted(getAllElements(self.decomposed_cluster)), range(15))