Example #1
0
 def silhouette(cls, graph, clusters):
     """
     Find the average silhouette distance for the clusters.
     """
     paths = graph.paths()
     
     # Calculate the distances for all pairs of nodes.
     dists = {}
     for node1, node2 in combinations(graph.nodes, 2):
         value1 = node1.value
         value2 = node2.value
         dist = graph.dist(node1, node2, paths)
             
         if not value1 in dists:
             dists[value1] = {}
         if not value2 in dists:
             dists[value2] = {}
             
         dists[value1][value2] = dist
         dists[value2][value1] = dist
     
     s = 0.0
     for node in graph.nodes:
         # Find a and b.
         a = 0.0
         b = float('inf')
         for cluster in clusters:
             if cluster.node(node.value):
                 if len(cluster.nodes) > 1:
                     a = sum([dists[node.value][onode.value] for onode in cluster.nodes if node.value != onode.value]) / float(len(cluster.nodes) - 1)
                 else:
                     a = 0.0
             else:
                 b = min(b, sum([dists[node.value][onode.value] for onode in cluster.nodes]) / float(len(cluster.nodes)))
         
         if b == float('inf'):
             b = 0.0
         
         s += (b - a) / max(a, b)
     
     return s / len(graph.nodes)
Example #2
0
 def davies_bouldin(cls, graph, clusters):
     """
     Return the davies bouldin index for the clusters.
     
     Key arguments:
     graph    -- the original graph
     clusters -- the clusters to analyze
     """
     # If we only have one cluster, then return inf!
     if len(clusters) < 2:
         return float('inf')
     
     # Special case, if all the clusters are singletons, return inf.
     singletons = True
     for cluster in clusters:
         if len(cluster.nodes) > 1:
             singletons = False
             break
     
     if singletons:
         return float('inf')
     
     # Calculate the diameters for each cluster.
     diams = {}
     for cluster in clusters:
         diams[cluster] = 0.0
         if len(cluster.nodes) > 1:
             paths = cluster.paths()
             for node1, node2 in combinations(cluster.nodes, 2):
                 diams[cluster] = max(diams[cluster], cluster.dist(node1, node2, paths))
               
             # If it weren't for directed graphs, we could use the combinations method.
             for node1 in paths:
                 for node2 in paths[node1]:
                     if node1 != node2 and paths[node1][node2]:
                         diams[cluster] = max(diams[cluster], cluster.dist(node1, node2, paths))
     
     # Find all the graphs paths.
     paths = graph.paths()
     
     # Calculate the distances between each cluster.
     dists = {}
     for cluster1, cluster2 in combinations(clusters, 2):
         if not cluster1 in dists:
             dists[cluster1] = {}
         if not cluster2 in dists:
             dists[cluster2] = {}
             
         # Find the average cluster distance between cluster i and j.
         dist = sum([graph.dist(node1, node2, paths) for node1, node2 in product(cluster1.nodes, cluster2.nodes)]) / float(len(cluster1.nodes) * len(cluster2.nodes))
         dists[cluster1][cluster2] = dist
         dists[cluster2][cluster1] = dist
     
     num = 0.0
     for cluster1 in clusters:
         max_db = 0.0
         for cluster2 in clusters:
             if cluster1 != cluster2:
                 max_db = max(max_db, (diams[cluster1] + diams[cluster2]) / dists[cluster1][cluster2])
         num += max_db
         
     return num / len(clusters)