def makeDecisionTree(data, attributes, default, target_attribute, iteration, numeric_attrs):
	iteration += 1

	if iteration > 10:
		return Leaf(default)
	if not data:
		tree = Leaf(default)
	elif one_class(data):
		tree = Leaf(data[0][-1])
	else:
		best_attr = selectAttr(data, attributes, target_attribute, numeric_attrs)
		if best_attr is False:
			tree = Leaf(default)

		else:
			split_examples = makeSplit(data, best_attr[0], best_attr[1], numeric_attrs) #new decision tree with root test *best_attr*
			best_attr.append(split_examples['numeric'])
			best_attr.append(attributes[best_attr[0]])
			best_attr.append(split_examples["mode"])
			tree = MiddleNode(best_attr)
			for branch_lab, branch_examples in split_examples['branches'].iteritems():
				if not branch_examples:
					break
				sub_default = mode(branch_examples, -1)
				subtree = makeDecisionTree(branch_examples, attributes, sub_default, target_attribute, iteration, numeric_attrs)
				tree.add_branch(branch_lab, subtree, sub_default)
	return tree
Example #2
0
def neighbor_joining(m, ids=None):
    """Neighbor Joining algorithm.
  
      Given a distance matrix, the algorithm seeks a tree that approximates the
  measured distances by greedily choosing to join the pair of elements that
  minimize the total sum of edge lengths.
  """
    n = len(m)
    if ids is None:
        ids = artificial_ids(n)

    # Turn m symmetric (and floating-point) if it's not already
    m = [[(m[i][j] + m[j][i]) / 2.0 for i in xrange(n)] for j in xrange(n)]

    tree = [Leaf(id_) for id_ in ids]

    for _ in xrange(n, 2, -1):
        # Find closest neighbors
        s = map(sum, m)
        q = calculate_q(m, s)

        # Join neighbors and update distance matrix
        i, j = sorted(matrix_argmin(q))
        m, di, dj = update_distance_matrix(m, s, i, j)
        tree = join_neighbors(tree, i, j, di, dj)

    d = m[0][1]
    return join_neighbors(tree, 0, 1, d / 2, d / 2)[0]
Example #3
0
def build_tree(dataset, attr_names):
    """
    Recursively build the tree in the top to down manner.

    Looks for the best split attribute on current node.
    If the reduction on impurity after split is 0, then return a Leaf node.

    Otherwise split using the best condition calculated.
    Call build_tree on its left child and then on its right child.

    :param dataset: a 2d list representing a list of data objects :param attr_names: a list containing attribute
    names
    :return: a node with left child as left_branch and right child as right_branch and condition as the best
    splitting condition on this node.
    """
    condition, imp_red = best_split(dataset, attr_names)

    if imp_red == 0:
        return Leaf(dataset)

    left_rows, right_rows = split_on_condition(dataset, condition)

    left_branch = build_tree(left_rows, attr_names)
    right_branch = build_tree(right_rows, attr_names)

    return Node(condition, left_branch, right_branch)
Example #4
0
def shunting_yard_AST(tokens: Iterable[str]) -> Tree:
    """
    Takes a list of tokens and produces a Tree of operators with the query terms at the Leaf

    *Description of the algorithm*:
    1. read token
    - if operands => queue
    - if not op. o1 take o2 from op stack while op1 <= op2 and put o2 in out, then put o1 in stack
    - if ( put it on stack
    - if ) empty the stack in the queue until you find a ( (or error)
    2. when no more token
    - if still some tokens (but parenthesis => error) => queue
    """
    if len(tokens) == 0: # Support empty query
        return Leaf('__Empty__')
    output = []
    stack = []
    for token in tokens:
        if token not in OPERATORS:
            output.append(Leaf(token))
        elif token == '(':
            stack.append(token)
        elif token == ')':
            pop = ''
            while len(stack) != 0 and pop != '(':
                pop = stack.pop()
                if pop != '(':
                    add_node(output, pop)
            if pop != '(':
                raise Exception("Mismatching parenthesis", pop, stack)
        else:
            while len(stack) != 0:
                pop = stack.pop()
                if OPERATORS[token] <= OPERATORS[pop]:
                    add_node(output, pop)
                else:
                    stack.append(pop)
                    break
            stack.append(token)

    while len(stack) > 0:
        if stack[-1] in "()":
            raise Exception("Unexpected ( )")
        else:
            add_node(output, stack.pop())
    return output.pop()
Example #5
0
def _random_joining(ids):
    """Generates a tree by repeatedly joining elements randomly."""
    r = Random()
    tree = [Leaf(id_) for id_ in ids]
    for _ in xrange(len(ids) - 1):
        i, j = sorted(r.sample(xrange(len(tree)), 2))
        tree = join_neighbors(tree, i, j, r.random(), r.random())

    return tree[0]
Example #6
0
 def expand(start, end, tag):
     """Yield all trees rooted by tag over words[start:end]."""
     if end - start == 1:
         word = words[start]
         if tag in tags_for_word(word):
             yield Leaf(tag, word)
     if tag in grammar:
         for tags in grammar[tag]:
             for branches in expand_all(start, end, tags):
                 yield Tree(tag, branches)
Example #7
0
def id3(Y, D, U):
    # zwrócenie liścia gdy jest jedna klasa
    if _is_one_class(U):
        if len(U) != 0:
            return Leaf(U[0]['y'])
        return Leaf(choice(Y))

    # zwrócenie liścia gdy nie ma decyzji do rozpatrzenia
    if len(D) == 0:
        return Leaf(_most_common_class(U))

    # wybranie decyzji dla której entropia zbiorów jest największa
    d = max(D, key=lambda d: _Inf_gain(d, U))
    U = _divide_set(d, U)
    D.remove(d)

    # zwrócenie korzenia drzewa z pozostałymi decyzjami
    return Node(
        d,
        [Edge(d['values'][i], id3(Y, D, U[i])) for i in range(len(U))]
    )
Example #8
0
def create_leaf(tree, label, tag=None):
    """
    A function that creates a leaf from a tree in a string format.
    :param tree: the tree in a string format.
    :param label: the sentiment label of this leaf (if exists).
    :param tag: the constituent tag of this leaf.
    :return: the leaf created according to the word and labels.
    """
    global counter
    word = find_word(tree)
    leaf = Leaf(label, word, tag)
    counter += 1
    return leaf
Example #9
0
def read_tree(tokens):
    """Read the next well-formed tree, removing empty constituents."""
    tag = next(tokens).split('-')[0].split('=')[0].split('|')[0]
    element = next(tokens)
    if element != '(':
        assert next(tokens) == ')'
        return Leaf(tag, element)
    branches = []
    while element != ')':
        assert element == '('
        branch = read_tree(tokens)
        if branch and branch.tag:
            branches.append(branch)
        element = next(tokens)
    if branches:
        return Tree(tag, branches)
Example #10
0
    def test_append(self):
        # Action, get tree.
        t = self.t

        # Action, append leaf in the root of tree.
        leaf = Leaf({'name': 'test leaf'})
        t.append(leaf)

        # Asert, leaf == last item, in the root of the tree.
        self.assertEqual(leaf, t[-1])

        # Action, append leaf in the root of a subtree.
        subtree = t.query('Node Four')
        subtree.append(leaf)

        # Asert, leaf == last item, in the root of the subtree.
        self.assertEqual(leaf, subtree[-1])
Example #11
0
    def test_set_cell(self):
        # Action, get tree.
        t = self.t

        # Action, insert leaf in the root of tree.
        leaf = Leaf({'name': 'test leaf', 'columns': ['test1', 'test2', 'test3']})
        t.append(leaf)

        # Asert, leaf was appended.
        self.assertEqual(leaf.name, t.query('test leaf').name)

        # Asert, column value == 'test1'
        self.assertEqual('test1', t.get_cell(leaf.id, 1))
        self.assertEqual('test1', t.get_cell('test leaf', 1))

        # Action, set column value.
        t.set_cell('test leaf', 2, 'TEST2')
        # Asert, column value == 'TWO' (upper case)
        self.assertEqual('TEST2', t.get_cell('test leaf', 2))
Example #12
0
 def add_leaf(self, tree, spot_info):
     """Add a falling leaf."""
     fall_time = randint(2000, 2500)
     leaf = Leaf(tree, spot_info, self.leaves, self.all_sprites)
     y = leaf.rect.centery + leaf.fall_distance
     ani = Animation(centery=y, duration=fall_time, round_values=True)
     ani.callback = leaf.land
     ani.start(leaf.rect)
     ani2 = Animation(centery=leaf.collider.centery + leaf.fall_distance,
                      duration=fall_time,
                      round_values=True)
     ani2.start(leaf.collider)
     fade = Animation(img_alpha=0,
                      duration=3000,
                      delay=fall_time,
                      round_values=True)
     fade.callback = leaf.kill
     fade.update_callback = leaf.set_alpha
     fade.start(leaf)
     self.animations.add(ani, ani2, fade)
Example #13
0
    def test_insert(self):
        # Action, get tree.
        t = deepcopy(self.t)
        t = self.t
        # Action, set item counter to 0.
        t.items = 0

        # Action, create leafs.
        leaf1 = Leaf({'name': 'test leaf1'})
        leaf2 = Leaf({'name': 'test leaf2'})
        leaf3 = Leaf({'name': 'test leaf3'})

        # Action, insert leafs.
        t.insert(0, leaf1)
        t.insert(2, leaf2)
        t.insert(len(t), leaf3)

        # Asert, leaf == item inserted at index.
        self.assertEqual(leaf1, t[0])
        self.assertEqual(leaf2, t[2])
        self.assertEqual(leaf3, t[len(t)-1])

        # Action, create leafs.
        leaf1 = Leaf({'name': 'test leaf1'})
        leaf2 = Leaf({'name': 'test leaf2'})
        leaf3 = Leaf({'name': 'test leaf3'})

        # Action, get node.
        t = deepcopy(self.t)
        subtree = t.query('Node Three')

        # Action, insert leafs.
        subtree.insert(0, leaf1)
        subtree.insert(2, leaf2)
        subtree.insert(len(subtree), leaf3)

        # Asert, leaf == item inserted at index.
        self.assertEqual(subtree, t.query('Node Three'))
        self.assertEqual(leaf1, subtree[0])
        self.assertEqual(leaf2, subtree[2])
        self.assertEqual(leaf3, subtree[len(subtree)-1])
Example #14
0
def nja(matrix):
    #Compute all unique keys in the matrix
    keys = set()
    for pair in matrix.keys():
        for elem in pair:
            keys.add(elem)

    #For all keys i, caluclate its r(i) according to step 1 of the NJA
    keysum = dict()
    for i in keys:
        total = sum([v for (a, b), v in matrix.items() if i == a])
        keysum[i] = total

    #For a given pair i,j calculate the value given by step 2 of the NJA
    def Q(i, j):
        m = matrix[(i, j)]
        sumi = keysum[i]
        sumj = keysum[j]

        return m - ((sumi + sumj) / len(keysum) - 2)

    #Compute a dictionary which contains value of the formula given by step 2 for
    #all possible i,j pairs
    Qmatrix = dict()
    for i, j in itertools.permutations(keys, 2):
        Qmatrix[(i, j)] = Q(i, j)

    #Find the minimum i,j pair by sorting the dictionary by its values
    minpair = sorted(Qmatrix.items(), key=lambda x: x[1])[0][0]
    i, j = minpair

    #Create a new node for the PT, with i and j as leaves
    inode = Leaf(str(i))
    jnode = Leaf(str(j))
    node = Node(inode, jnode)

    #Calculate the distance to i and j from the new node.
    def Q1(i, j, right=False):
        m = matrix[(i, j)]
        sumi = keysum[i]
        sumj = keysum[j]

        if right:
            return (m + ((sumj - sumi) / len(keysum) - 2.0)) / 2
        else:
            return (m + ((sumi - sumj) / len(keysum) - 2.0)) / 2

    #Add the distances to the node object,
    node.dleft = Q1(i, j)
    node.dright = Q1(i, j, True)

    #Calculate the distances from all other nodes to the new node and add them to the
    #matrix
    matrix[(minpair, minpair)] = 0
    for k in keys:
        if k != i or k != j:
            ij = matrix[(i, j)]
            ik = matrix[(i, k)]
            jk = matrix[(j, k)]
            matrix[(minpair, k)] = (ik + jk - ij) / 2
            matrix[(k, minpair)] = (ik + jk - ij) / 2

    #Delete all values from the old i and j keys from the matrix
    todel = [pair for pair in matrix.keys() if i in pair or j in pair]
    for key in todel:
        del matrix[key]

    return node
Example #15
0
from tree import Tree, Leaf
from print_tree import print_tree

lexicon = {
    Leaf('N', 'buffalo'),  # bison
    Leaf('V', 'buffalo'),  # intimidate
    Leaf('J', 'buffalo'),  # New York
    Leaf('R', 'that'),
}

grammar = {
    'S': [['NP', 'VP']],
    'NP': [['N'], ['J', 'N'], ['NP', 'RP']],
    'VP': [['V', 'NP']],
    'RP': [['R', 'NP', 'V'], ['NP', 'V']],
}


def expand(tag):
    """Yield all trees rooted by tag."""
    for leaf in lexicon:
        if leaf.tag == tag:
            yield leaf
    if tag in grammar:
        for tags in grammar[tag]:
            for branches in expand_all(tags):
                yield Tree(tag, branches)


def expand_all(tags):
    """Yield all sequences of branches for a sequence of tags."""
Example #16
0
from tree import Tree, Leaf
from print_tree import print_tree

lexicon = {
    Leaf('N', 'buffalo'),  # beasts
    Leaf('V', 'buffalo'),  # intimidate
}

grammar = {
    'S': [['NP', 'VP']],
    'NP': [['N']],
    'VP': [['V', 'NP']],
}


def expand(tag):
    """Yield all trees rooted by tag."""


def expand_all(tags):
    """Yield all sequences of branches for a sequence of tags."""
Example #17
0
#!/usr/bin/python3

from pretty_printer import PreOrderVisitor, InOrderVisitor, LeavesVisitor
from tree import Node, Leaf

myTree = Node(Leaf(1), Node(Node(Leaf(3), Leaf(5), 4), Leaf(7), 6), 2)


def main():
    pp = PreOrderVisitor()
    myTree.accept(pp)
    print("----------")
    pp = InOrderVisitor()
    myTree.accept(pp)
    print("----------")
    pp = LeavesVisitor()
    myTree.accept(pp)


if __name__ == "__main__":
    main()
Example #18
0
from tree import Tree, Leaf
from print_tree import print_tree

lexicon = {
    Leaf('N', 'buffalo'),  # beasts
    Leaf('V', 'buffalo'),  # intimidate
    Leaf('J', 'buffalo'),  # from New York
    Leaf('R', 'that')
}

grammar = {
    'S': [['NP', 'VP']],
    'NP': [['N'], ['J', 'N'], ['NP', 'RP']],
    'VP': [['V', 'NP']],
    'RP': [['R', 'NP', 'V']],
}


def expand(tag):
    """Yield all trees rooted by tag."""
    for leaf in lexicon:
        if tag == leaf.tag:
            yield leaf
    if tag in grammar:
        for tags in grammar[tag]:
            for branches in expand_all(tags):
                yield Tree(tag, branches)


def expand_all(tags):
    """Yield all sequences of branches for a sequence of tags."""