Example #1
0
def perform_orange_clustering(mss_id):

    import orange
    from task_similarities import TreeNode
    import helper

    #(dist_full, id_to_name) = compute_task_distances(mss_id)
    p = '/fml/ag-raetsch/home/cwidmer'
    (dist_full, id_to_name) = helper.load(p + "/dist")

    l = []
    for i in range(len(dist_full)):
        l.append([])
        for j in range(i + 1, len(dist_full)):
            l[i].append(dist_full[i, j])
    l.reverse()

    m = orange.SymMatrix(l)

    root = orange.HierarchicalClustering(
        m, linkage=orange.HierarchicalClustering.Average)
    root_node = TreeNode("root")

    clusters = [root]
    nodes = [root_node]

    while len(clusters) > 0:

        cluster = clusters.pop(0)
        node = nodes.pop(0)

        # append nodes if non-empty
        if cluster.left:
            clusters.append(cluster.left)

            name = str(tuple(cluster.left))

            if len(tuple(cluster.left)) == 1:
                name = id_to_name[tuple(cluster.left)[0]]
            print name
            # create nodes
            left_node = TreeNode(name)
            node.add_child(left_node, 1.0)
            nodes.append(left_node)

        # append nodes if non-empty
        if cluster.right:
            clusters.append(cluster.right)

            name = str(tuple(cluster.right))

            if len(tuple(cluster.right)) == 1:
                name = id_to_name[tuple(cluster.right)[0]]
            print name
            # create nodes
            right_node = TreeNode(name)
            node.add_child(right_node, 1.0)
            nodes.append(right_node)

    return root_node
Example #2
0
 def hierarchical_clustering(linkage, distance_matrix):
     import orange
     linkages = [("Single linkage", orange.HierarchicalClustering.Single),
                 ("Average linkage", orange.HierarchicalClustering.Average),
                 ("Ward's linkage", orange.HierarchicalClustering.Ward),
                 ("Complete linkage",
                  orange.HierarchicalClustering.Complete)]
     return orange.HierarchicalClustering(distance_matrix,
                                          linkage=linkages[linkage][1])
Example #3
0
 def sortClusteringOrdered(self):
     self.rootCluster = orange.HierarchicalClustering(
         self.matrix, linkage=orange.HierarchicalClustering.Average)
     import orngClustering
     self.progressBarInit()
     orngClustering.orderLeaves(self.rootCluster, self.matrix,
                                self.progressBarSet)
     self.progressBarFinished()
     self.order = list(self.rootCluster.mapping)
Example #4
0
 def run_clustering(self):
     if self.matrix:
         self.progressBarInit()
         self.root_cluster = orange.HierarchicalClustering(
             self.matrix,
             linkage=self.linkage[self.Linkage][1],
             progressCallback=lambda value, a: self.progressBarSet(value *
                                                                   100))
         self.progressBarFinished()
         self.display_tree()
Example #5
0
def hierarchicalClustering_attributes(data, distance=None, linkage=orange.HierarchicalClustering.Average, order=False, progressCallback=None):
    """Return hierarhical clustering of attributes in the data set."""
    matrix = orange.SymMatrix(len(data.domain.attributes))
    for a1 in range(len(data.domain.attributes)):
        for a2 in range(a1):
            matrix[a1, a2] = orange.PearsonCorrelation(a1, a2, data, 0).p
    root = orange.HierarchicalClustering(matrix, linkage=linkage, progressCallback=progressCallback)
    if order:
        orderLeaves(root, matrix, progressCallback=progressCallback)
    return root
Example #6
0
 def hierarchical_clustering(linkage, distance_matrix):
     import Orange, orange, sys
     linkages = [("Single linkage", orange.HierarchicalClustering.Single),
                 ("Average linkage", orange.HierarchicalClustering.Average),
                 ("Ward's linkage", orange.HierarchicalClustering.Ward),
                 ("Complete linkage", orange.HierarchicalClustering.Complete)]
     try:
         return orange.HierarchicalClustering(distance_matrix, linkage=linkages[linkage][1])
     except TypeError as e:
         print "hierarchical_clustering:", sys.exc_info()[0]
         print e
Example #7
0
 def test_iris(self):
     data = orange.ExampleTable("iris")
     dss = orange.ExamplesDistanceConstructor_Euclidean(data)
     t = orange.HierarchicalClustering.Linkage
     for linkage in [t.Single, t.Average, t.Complete, t.Ward]:
         dist = orange.SymMatrix(len(data))
         for i, e in enumerate(data):
             for j in range(i):
                 dist[i, j] = dss(e, data[j])
         root = orange.HierarchicalClustering(dist, linkage=linkage)
         self.assertEqual(len(root), len(data))
         self.rectestlen(root)
         root.mapping.objects = data
         self.assertEqual(root[0], data[0])
Example #8
0
def hierarchicalClustering(data,
                           distanceConstructor=orange.ExamplesDistanceConstructor_Euclidean,
                           linkage=orange.HierarchicalClustering.Average,
                           order=False,
                           progressCallback=None):
    """Return a hierarhical clustering of the data set."""
    distance = distanceConstructor(data)
    matrix = orange.SymMatrix(len(data))
    for i in range(len(data)):
        for j in range(i+1):
            matrix[i, j] = distance(data[i], data[j])
    root = orange.HierarchicalClustering(matrix, linkage=linkage, progressCallback=(lambda value, obj=None: progressCallback(value*100.0/(2 if order else 1))) if progressCallback else None)
    if order:
        orderLeaves(root, matrix, progressCallback=(lambda value: progressCallback(50.0 + value/2)) if progressCallback else None)
    return root
Example #9
0
    print int(round(100 * f)),


repTime("Loading data")
data = orange.ExampleTable("iris")

repTime("Computing distances")
matrix = orange.SymMatrix(len(data))
matrix.setattr("objects", data)
distance = orange.ExamplesDistanceConstructor_Euclidean(data)
for i1, ex1 in enumerate(data):
    for i2 in range(i1 + 1, len(data)):
        matrix[i1, i2] = distance(ex1, data[i2])

repTime("Hierarchical clustering (single linkage)")
clustering = orange.HierarchicalClustering()
clustering.linkage = clustering.Average
clustering.overwriteMatrix = 1
root = clustering(matrix)

repTime("Done.")


def prune(cluster, togo):
    if cluster.branches:
        if togo < 0:
            cluster.branches = None
        else:
            for branch in cluster.branches:
                prune(branch, togo - cluster.height)
Example #10
0
import orange
from orngClustering import *

# data = orange.ExampleTable("doc//datasets//brown-selected.tab")
data = orange.ExampleTable("iris")
# data = orange.ExampleTable("doc//datasets//zoo.tab")
# data = orange.ExampleTable("doc//datasets//titanic.tab")
# m = [[], [ 3], [ 2, 4], [17, 5, 4], [ 2, 8, 3, 8], [ 7, 5, 10, 11, 2], [ 8, 4, 1, 5, 11, 13], [ 4, 7, 12, 8, 10, 1, 5], [13, 9, 14, 15, 7, 8, 4, 6], [12, 10, 11, 15, 2, 5, 7, 3, 1]]
# matrix = orange.SymMatrix(m)
dist = orange.ExamplesDistanceConstructor_Euclidean(data)
matrix = orange.SymMatrix(len(data))
# matrix.setattr('items', data)
for i in range(len(data)):
    for j in range(i + 1):
        matrix[i, j] = dist(data[i], data[j])
root = orange.HierarchicalClustering(
    matrix, linkage=orange.HierarchicalClustering.Average)
# root.mapping.objects = [str(ex.getclass()) for ex in data]
d = DendrogramPlot(root,
                   data=data,
                   labels=[str(ex.getclass()) for ex in data],
                   width=500,
                   height=2000)
d.set_matrix_color_schema([(0, 255, 0), (255, 0, 0)], 0.0, 1.0)
# d.setClusterColors({root.left:(0,255,0), root.right:(0,0,255)})
d.plot("graph.png")
print "Sum:", sum([
    matrix[root.mapping[i], root.mapping[i + 1]]
    for i in range(len(root.mapping) - 1)
])
orderLeaves(root, matrix)
print "Sum:", sum([
Example #11
0
 def sortClustering(self):
     self.rootCluster = orange.HierarchicalClustering(
         self.matrix, linkage=orange.HierarchicalClustering.Average)
     self.order = list(self.rootCluster.mapping)