def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis): for cluster_type in separated_decomposed_clusters: for cluster_id in separated_decomposed_clusters[cluster_type]: decomposed_cluster = separated_decomposed_clusters[cluster_type][cluster_id] analysis[cluster_id] = {"components":decomposed_cluster.keys(),"global":{}} analysis[cluster_id]["global"]["mean"], analysis[cluster_id]["global"]["std"], analysis[cluster_id]["global"]["max"] = calculate_distance_stats(getAllElements(decomposed_cluster), distance_matrix) analysis[cluster_id]["global"]["num_elements"] = len(getAllElements(decomposed_cluster)) for traj_id in decomposed_cluster: analysis[cluster_id]["global"][traj_id] = {} analysis[cluster_id]["global"][traj_id]["mean"], analysis[cluster_id]["global"][traj_id]["std"], analysis[cluster_id]["global"][traj_id]["max"] = calculate_distance_stats(decomposed_cluster[traj_id], distance_matrix) analysis[cluster_id]["global"][traj_id]["num_elements"] = len(decomposed_cluster[traj_id]) if cluster_type == "mixed": analysis[cluster_id]["centers_mean_diff"] = calculate_mean_center_differences(decomposed_cluster, distance_matrix) analysis[cluster_id]["global"]["overlap"] = OverlapCalculator.calculate_cluster_overlap(2, decomposed_cluster, distance_matrix)
def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis): """ Performs the overlap analysis of separated clusters. """ for cluster_type in separated_decomposed_clusters: for cluster_id in separated_decomposed_clusters[cluster_type]: decomposed_cluster = separated_decomposed_clusters[ cluster_type][cluster_id] analysis[cluster_id] = { "components": decomposed_cluster.keys(), "global": {} } analysis[cluster_id]["global"]["mean"], analysis[cluster_id][ "global"]["std"], analysis[cluster_id]["global"][ "max"] = calculate_distance_stats( getAllElements(decomposed_cluster), distance_matrix) analysis[cluster_id]["global"]["num_elements"] = len( getAllElements(decomposed_cluster)) for traj_id in decomposed_cluster: analysis[cluster_id]["global"][traj_id] = {} analysis[cluster_id]["global"][traj_id]["mean"], analysis[ cluster_id]["global"][traj_id]["std"], analysis[ cluster_id]["global"][traj_id][ "max"] = calculate_distance_stats( decomposed_cluster[traj_id], distance_matrix) analysis[cluster_id]["global"][traj_id][ "num_elements"] = len(decomposed_cluster[traj_id]) if cluster_type == "mixed": analysis[cluster_id][ "centers_mean_diff"] = calculate_mean_center_differences( decomposed_cluster, distance_matrix) # The overlap ranges between 0 and 1, being 0 the best value. We invert it in order to # to get a more understandable range (1 is the best value and 0 the worst). analysis[cluster_id]["global"][ "overlap"] = 1 - OverlapCalculator.calculate_cluster_overlap( decomposed_cluster, distance_matrix)
def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis): """ Performs the overlap analysis of separated clusters. """ for cluster_type in separated_decomposed_clusters: for cluster_id in separated_decomposed_clusters[cluster_type]: decomposed_cluster = separated_decomposed_clusters[cluster_type][cluster_id] analysis[cluster_id] = {"components":decomposed_cluster.keys(),"global":{}} analysis[cluster_id]["global"]["mean"], analysis[cluster_id]["global"]["std"], analysis[cluster_id]["global"]["max"] = calculate_distance_stats(getAllElements(decomposed_cluster), distance_matrix) analysis[cluster_id]["global"]["num_elements"] = len(getAllElements(decomposed_cluster)) for traj_id in decomposed_cluster: analysis[cluster_id]["global"][traj_id] = {} analysis[cluster_id]["global"][traj_id]["mean"], analysis[cluster_id]["global"][traj_id]["std"], analysis[cluster_id]["global"][traj_id]["max"] = calculate_distance_stats(decomposed_cluster[traj_id], distance_matrix) analysis[cluster_id]["global"][traj_id]["num_elements"] = len(decomposed_cluster[traj_id]) if cluster_type == "mixed": analysis[cluster_id]["centers_mean_diff"] = calculate_mean_center_differences(decomposed_cluster, distance_matrix) # The overlap ranges between 0 and 1, being 0 the best value. We invert it in order to # to get a more understandable range (1 is the best value and 0 the worst). analysis[cluster_id]["global"]["overlap"] = 1 - OverlapCalculator.calculate_cluster_overlap( decomposed_cluster, distance_matrix)
def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis): for cluster_type in separated_decomposed_clusters: for cluster_id in separated_decomposed_clusters[cluster_type]: decomposed_cluster = separated_decomposed_clusters[ cluster_type][cluster_id] analysis[cluster_id] = { "components": decomposed_cluster.keys(), "global": {} } analysis[cluster_id]["global"]["mean"], analysis[cluster_id][ "global"]["std"], analysis[cluster_id]["global"][ "max"] = calculate_distance_stats( getAllElements(decomposed_cluster), distance_matrix) analysis[cluster_id]["global"]["num_elements"] = len( getAllElements(decomposed_cluster)) for traj_id in decomposed_cluster: analysis[cluster_id]["global"][traj_id] = {} analysis[cluster_id]["global"][traj_id]["mean"], analysis[ cluster_id]["global"][traj_id]["std"], analysis[ cluster_id]["global"][traj_id][ "max"] = calculate_distance_stats( decomposed_cluster[traj_id], distance_matrix) analysis[cluster_id]["global"][traj_id][ "num_elements"] = len(decomposed_cluster[traj_id]) if cluster_type == "mixed": analysis[cluster_id][ "centers_mean_diff"] = calculate_mean_center_differences( decomposed_cluster, distance_matrix) analysis[cluster_id]["global"][ "overlap"] = OverlapCalculator.calculate_cluster_overlap( 2, decomposed_cluster, distance_matrix)
def test_calculate_mean_centers_difference(self): expected_medoids = [[1,1],[1,6],[6,1]] expected_mean = numpy.mean(scipy.spatial.distance.pdist(expected_medoids)) self.assertAlmostEqual(expected_mean, calculate_mean_center_differences(self.decomposed_cluster, self.matrix), 8)