def perform_orange_clustering(mss_id): import orange from task_similarities import TreeNode import helper #(dist_full, id_to_name) = compute_task_distances(mss_id) p = '/fml/ag-raetsch/home/cwidmer' (dist_full, id_to_name) = helper.load(p + "/dist") l = [] for i in range(len(dist_full)): l.append([]) for j in range(i + 1, len(dist_full)): l[i].append(dist_full[i, j]) l.reverse() m = orange.SymMatrix(l) root = orange.HierarchicalClustering( m, linkage=orange.HierarchicalClustering.Average) root_node = TreeNode("root") clusters = [root] nodes = [root_node] while len(clusters) > 0: cluster = clusters.pop(0) node = nodes.pop(0) # append nodes if non-empty if cluster.left: clusters.append(cluster.left) name = str(tuple(cluster.left)) if len(tuple(cluster.left)) == 1: name = id_to_name[tuple(cluster.left)[0]] print name # create nodes left_node = TreeNode(name) node.add_child(left_node, 1.0) nodes.append(left_node) # append nodes if non-empty if cluster.right: clusters.append(cluster.right) name = str(tuple(cluster.right)) if len(tuple(cluster.right)) == 1: name = id_to_name[tuple(cluster.right)[0]] print name # create nodes right_node = TreeNode(name) node.add_child(right_node, 1.0) nodes.append(right_node) return root_node
def hierarchical_clustering(linkage, distance_matrix): import orange linkages = [("Single linkage", orange.HierarchicalClustering.Single), ("Average linkage", orange.HierarchicalClustering.Average), ("Ward's linkage", orange.HierarchicalClustering.Ward), ("Complete linkage", orange.HierarchicalClustering.Complete)] return orange.HierarchicalClustering(distance_matrix, linkage=linkages[linkage][1])
def sortClusteringOrdered(self): self.rootCluster = orange.HierarchicalClustering( self.matrix, linkage=orange.HierarchicalClustering.Average) import orngClustering self.progressBarInit() orngClustering.orderLeaves(self.rootCluster, self.matrix, self.progressBarSet) self.progressBarFinished() self.order = list(self.rootCluster.mapping)
def run_clustering(self): if self.matrix: self.progressBarInit() self.root_cluster = orange.HierarchicalClustering( self.matrix, linkage=self.linkage[self.Linkage][1], progressCallback=lambda value, a: self.progressBarSet(value * 100)) self.progressBarFinished() self.display_tree()
def hierarchicalClustering_attributes(data, distance=None, linkage=orange.HierarchicalClustering.Average, order=False, progressCallback=None): """Return hierarhical clustering of attributes in the data set.""" matrix = orange.SymMatrix(len(data.domain.attributes)) for a1 in range(len(data.domain.attributes)): for a2 in range(a1): matrix[a1, a2] = orange.PearsonCorrelation(a1, a2, data, 0).p root = orange.HierarchicalClustering(matrix, linkage=linkage, progressCallback=progressCallback) if order: orderLeaves(root, matrix, progressCallback=progressCallback) return root
def hierarchical_clustering(linkage, distance_matrix): import Orange, orange, sys linkages = [("Single linkage", orange.HierarchicalClustering.Single), ("Average linkage", orange.HierarchicalClustering.Average), ("Ward's linkage", orange.HierarchicalClustering.Ward), ("Complete linkage", orange.HierarchicalClustering.Complete)] try: return orange.HierarchicalClustering(distance_matrix, linkage=linkages[linkage][1]) except TypeError as e: print "hierarchical_clustering:", sys.exc_info()[0] print e
def test_iris(self): data = orange.ExampleTable("iris") dss = orange.ExamplesDistanceConstructor_Euclidean(data) t = orange.HierarchicalClustering.Linkage for linkage in [t.Single, t.Average, t.Complete, t.Ward]: dist = orange.SymMatrix(len(data)) for i, e in enumerate(data): for j in range(i): dist[i, j] = dss(e, data[j]) root = orange.HierarchicalClustering(dist, linkage=linkage) self.assertEqual(len(root), len(data)) self.rectestlen(root) root.mapping.objects = data self.assertEqual(root[0], data[0])
def hierarchicalClustering(data, distanceConstructor=orange.ExamplesDistanceConstructor_Euclidean, linkage=orange.HierarchicalClustering.Average, order=False, progressCallback=None): """Return a hierarhical clustering of the data set.""" distance = distanceConstructor(data) matrix = orange.SymMatrix(len(data)) for i in range(len(data)): for j in range(i+1): matrix[i, j] = distance(data[i], data[j]) root = orange.HierarchicalClustering(matrix, linkage=linkage, progressCallback=(lambda value, obj=None: progressCallback(value*100.0/(2 if order else 1))) if progressCallback else None) if order: orderLeaves(root, matrix, progressCallback=(lambda value: progressCallback(50.0 + value/2)) if progressCallback else None) return root
print int(round(100 * f)), repTime("Loading data") data = orange.ExampleTable("iris") repTime("Computing distances") matrix = orange.SymMatrix(len(data)) matrix.setattr("objects", data) distance = orange.ExamplesDistanceConstructor_Euclidean(data) for i1, ex1 in enumerate(data): for i2 in range(i1 + 1, len(data)): matrix[i1, i2] = distance(ex1, data[i2]) repTime("Hierarchical clustering (single linkage)") clustering = orange.HierarchicalClustering() clustering.linkage = clustering.Average clustering.overwriteMatrix = 1 root = clustering(matrix) repTime("Done.") def prune(cluster, togo): if cluster.branches: if togo < 0: cluster.branches = None else: for branch in cluster.branches: prune(branch, togo - cluster.height)
import orange from orngClustering import * # data = orange.ExampleTable("doc//datasets//brown-selected.tab") data = orange.ExampleTable("iris") # data = orange.ExampleTable("doc//datasets//zoo.tab") # data = orange.ExampleTable("doc//datasets//titanic.tab") # m = [[], [ 3], [ 2, 4], [17, 5, 4], [ 2, 8, 3, 8], [ 7, 5, 10, 11, 2], [ 8, 4, 1, 5, 11, 13], [ 4, 7, 12, 8, 10, 1, 5], [13, 9, 14, 15, 7, 8, 4, 6], [12, 10, 11, 15, 2, 5, 7, 3, 1]] # matrix = orange.SymMatrix(m) dist = orange.ExamplesDistanceConstructor_Euclidean(data) matrix = orange.SymMatrix(len(data)) # matrix.setattr('items', data) for i in range(len(data)): for j in range(i + 1): matrix[i, j] = dist(data[i], data[j]) root = orange.HierarchicalClustering( matrix, linkage=orange.HierarchicalClustering.Average) # root.mapping.objects = [str(ex.getclass()) for ex in data] d = DendrogramPlot(root, data=data, labels=[str(ex.getclass()) for ex in data], width=500, height=2000) d.set_matrix_color_schema([(0, 255, 0), (255, 0, 0)], 0.0, 1.0) # d.setClusterColors({root.left:(0,255,0), root.right:(0,0,255)}) d.plot("graph.png") print "Sum:", sum([ matrix[root.mapping[i], root.mapping[i + 1]] for i in range(len(root.mapping) - 1) ]) orderLeaves(root, matrix) print "Sum:", sum([
def sortClustering(self): self.rootCluster = orange.HierarchicalClustering( self.matrix, linkage=orange.HierarchicalClustering.Average) self.order = list(self.rootCluster.mapping)