Exemple #1
0
 def reduce(self, rule):
     if rule == 1:
         self.result = self.parse_stack.pop()
     elif rule == 2:
         digit = self.parse_stack.pop()
         self.parse_stack.append(tree.leaf(digit))
     elif rule == 3:
         r = self.parse_stack.pop()
         l = self.parse_stack.pop()
         # this rule has
         self.state_stack.pop()
         self.state_stack.pop()
         self.state_stack.pop()
         self.state_stack.pop()
         self.parse_stack.append(tree.node(l, r))
     self.state_stack.pop()
     state = self.current_state()
     if state == 0:
         if rule == 1:
             self.goto(3)
         elif rule == 2 or rule == 3:
             self.goto(4)
     elif state == 2:
         self.goto(5)
     elif state == 7:
         self.goto(8)
Exemple #2
0
	def to_tree_rec(self, node_index, parent_index):
		'''Recursive helper for converting a subtree into a tree object'''
		if self.is_terminal(node_index):
			taxon_id, parent = self.tree[node_index]
			assert parent == parent_index
			return tree.leaf(taxon_id)
		else:
			children = list(self.tree[node_index][1:])
			children.remove(parent_index)
			left_child = self.to_tree_rec(children[0], node_index)
			right_child = self.to_tree_rec(children[1], node_index)
			return tree.node(left_child, right_child)
Exemple #3
0
def makeFontTree(font_list, matrix):
    print "making font tree from a list of", len(font_list)

    #initialize tree(s) for the shittiest agglomerative clustering algo ever written
    # (but in fairness, i ONLY have distances to work with -- not dimensions)
    trees = []
    for i, font in enumerate(font_list):
        l = tree.leaf()
        l.ptr = i
        trees.append(l)

    # find pairs of fonts in the matrix and cluster them until we have 1 tree left
    while 1 < len(trees):
        print "merges remaining", len(trees) - 1

        # find min font
        (f1, f2) = getMinFontDistance(matrix)
        if (-1, -1) == (f1, f2):
            print matrix

        #iterate through the other trees
        new_trees = []
        for i, t in enumerate(trees):
            if i == f1 or i == f2: continue
            new_trees.append(t)

        new_matrix = []
        #iterate through the rest of the matrix and add those rows here
        for i, r in enumerate(matrix):
            if i == f1 or i == f2: continue
            new_row = []
            for j, c in enumerate(matrix[i]):
                if j == f1 or j == f2: continue
                new_row.append(c)
            new_matrix.append(new_row)


        # create new branch
        t1 = trees[f1]
        t2 = trees[f2]
        br = tree.branch()
        br.set_branches(t1, t2)

        # add as last tree
        new_trees.append(br)


        #this code uses weighted averages to reconcile distances,
        # ... but doesn't produce great results
        """
        # calculate new row... last in the matrix, so the last row is 0
        new_row = []
        weight_t1 = t1.num_leaves()
        weight_t2 = t2.num_leaves()
        weight = weight_t1 + weight_t2
        for i, dist in enumerate(matrix[f1]):
            if i == f1 or i == f2: continue

            #is averaging a bad idea?  what about assuming a worst-case scenario?
            # i guess that would mean a huge pythagorean calculation... maybe later
            s1 = matrix[f1][i] * weight_t1
            s2 = matrix[f2][i] * weight_t2            avg_dist = (s1 + s2) / weight
            new_row.append(avg_dist)
            """

        # calculate new row... pythagorean distance
        new_row = []
        for i, dist in enumerate(matrix[f1]):
            if i == f1 or i == f2: continue

            s1 = matrix[f1][i]
            s2 = matrix[f2][i]
            pythag_dist = math.sqrt(s1**2 + s2**2)

            new_row.append(pythag_dist)


        new_row.append(0) #this is the last row, distance to self = 0
        new_matrix.append(new_row)



        #complete the last col of the matrix from the last row
        mylen = len(matrix[0])
        for i, dontcare in enumerate(new_matrix):
            if i < mylen - 2:
                new_matrix[i].append(new_row[i])

        #update vars
        trees = new_trees
        matrix = new_matrix

        #repeat until there is 1 left

    return trees[0]
Exemple #4
0
	def to_tree(self):
		'''Convert this object into a tree object'''
		outgroup, initial_pointer = self.tree[self.outgroup_index]
		return tree.node(tree.leaf(outgroup), self.to_tree_rec(initial_pointer, self.outgroup_index))
Exemple #5
0
def makeFontTree(font_list, matrix):
    print "making font tree from a list of", len(font_list)

    #initialize tree(s) for the shittiest agglomerative clustering algo ever written
    # (but in fairness, i ONLY have distances to work with -- not dimensions)
    trees = []
    for i, font in enumerate(font_list):
        l = tree.leaf()
        l.ptr = i
        trees.append(l)

    # find pairs of fonts in the matrix and cluster them until we have 1 tree left
    while 1 < len(trees):
        print "merges remaining", len(trees) - 1

        # find min font
        (f1, f2) = getMinFontDistance(matrix)
        if (-1, -1) == (f1, f2):
            print matrix

        #iterate through the other trees
        new_trees = []
        for i, t in enumerate(trees):
            if i == f1 or i == f2: continue
            new_trees.append(t)

        new_matrix = []
        #iterate through the rest of the matrix and add those rows here
        for i, r in enumerate(matrix):
            if i == f1 or i == f2: continue
            new_row = []
            for j, c in enumerate(matrix[i]):
                if j == f1 or j == f2: continue
                new_row.append(c)
            new_matrix.append(new_row)

        # create new branch
        t1 = trees[f1]
        t2 = trees[f2]
        br = tree.branch()
        br.set_branches(t1, t2)

        # add as last tree
        new_trees.append(br)

        #this code uses weighted averages to reconcile distances,
        # ... but doesn't produce great results
        """
        # calculate new row... last in the matrix, so the last row is 0
        new_row = []
        weight_t1 = t1.num_leaves()
        weight_t2 = t2.num_leaves()
        weight = weight_t1 + weight_t2
        for i, dist in enumerate(matrix[f1]):
            if i == f1 or i == f2: continue

            #is averaging a bad idea?  what about assuming a worst-case scenario?
            # i guess that would mean a huge pythagorean calculation... maybe later
            s1 = matrix[f1][i] * weight_t1
            s2 = matrix[f2][i] * weight_t2            avg_dist = (s1 + s2) / weight
            new_row.append(avg_dist)
            """

        # calculate new row... pythagorean distance
        new_row = []
        for i, dist in enumerate(matrix[f1]):
            if i == f1 or i == f2: continue

            s1 = matrix[f1][i]
            s2 = matrix[f2][i]
            pythag_dist = math.sqrt(s1**2 + s2**2)

            new_row.append(pythag_dist)

        new_row.append(0)  #this is the last row, distance to self = 0
        new_matrix.append(new_row)

        #complete the last col of the matrix from the last row
        mylen = len(matrix[0])
        for i, dontcare in enumerate(new_matrix):
            if i < mylen - 2:
                new_matrix[i].append(new_row[i])

        #update vars
        trees = new_trees
        matrix = new_matrix

        #repeat until there is 1 left

    return trees[0]