Ejemplo n.º 1
0
 def test_Insert_Node(self):
     bst = BST()
     tNode1 = Node(21)
     tNode2 = Node(20)
     treeInsert(bst, tNode1)
     treeInsert(bst, tNode2)
     self.assertEqual(2, len(bst))
     searched = treeSearch(bst.root, 20)
     self.assertIsNotNone(searched)
Ejemplo n.º 2
0
 def _insert(self, value, currentNode):
     if value <= currentNode.value:
         if currentNode.hasLeftChild():
             self._insert(value, currentNode.left)
         else:
             currentNode.left = Node(value, parent=currentNode)
     else:
         if currentNode.hasRightChild():
             self._insert(value, currentNode.right)
         else:
             currentNode.right = Node(value, parent=currentNode)
Ejemplo n.º 3
0
 def add_node(self, node, key):
     if node.key == key:
         return
     if node.key > key:
         if node.left is None:
             node.left = Node(key)
         else:
             self.add_node(node.left, key)
     else:
         if node.right is None:
             node.right = Node(key)
         else:
             self.add_node(node.right, key)
Ejemplo n.º 4
0
    def test_Delete_Node(self):
        bst = BST()
        treeInsert(bst, Node(21))
        treeInsert(bst, Node(20))
        treeInsert(bst, Node(35))
        treeInsert(bst, Node(64))
        treeInsert(bst, Node(32))

        self.assertEqual(5, len(bst))
        searched = treeSearch(bst.root, 20)
        self.assertIsNotNone(searched)
        treeDelete(bst, searched)
        searchAgain = treeSearch(bst.root, 20)
        self.assertIsNone(searchAgain)
Ejemplo n.º 5
0
def randomForest(data, attributes, numAttributes, numDataPoints, numTrees, holdoutSet):
	trees = []
	uncategorizedPoints = 0
	numeric = isNumeric(data[0])

	for _ in range(numTrees):
		randomData, randomAttributes = selectRandomData(data, attributes, numAttributes, numDataPoints)
		currentTree = Node('Root', None)
		buildTree(randomData, randomAttributes, currentTree, THRESHOLD)
		trees.append(currentTree)

	bestClassifs = []
	for index, dataPoint in enumerate(holdoutSet):
		if numeric:
			classifs = [classifyPointNum(tree, dataPoint) for tree in trees]
		else:
			classifs = [classifyPointCat(tree, dataPoint) for tree in trees]
		classifs = list(filter(lambda x: x is not None, classifs))
		if len(classifs) == 0:
			uncategorizedPoints += 1
			continue
		mostFreqClassif = max(classifs, key=classifs.count)
		bestClassifs.append((dataPoint['id'], mostFreqClassif))

	return bestClassifs, uncategorizedPoints
Ejemplo n.º 6
0
class BST:
    def __init__(self, val):

        self.root = Node(val)

    def setRoot(self, val):
        self.root = Node(val)

    def getRoot(self):
        return self.root.get()

    def insert(self, val):
        self.root.insert(val)

    def search(self, val):
        return self.root.search(val)
Ejemplo n.º 7
0
 def insert(self, key):
     if self.root is None:
         self.root = Node(key)
         self.count += 1
     else:
         self.add_node(self.root, key)
         self.count += 1
    def __init__(self, level, threshold, num_label, file):
        self.__level = level
        self.__threshold = threshold
        data = xlrd.open_workbook(file)
        sheet = data.sheets()

        # read file in to input table
        for col in range(0, sheet[0].ncols - num_label):
            att = []
            for row in range(0, sheet[0].nrows):
                att.append(sheet[0].cell(row, col).value)
            self.__table.append(att)

        # read file in to label table
        for col in range(sheet[0].ncols - num_label, sheet[0].ncols):
            att = []
            for row in range(0, sheet[0].nrows):
                att.append(sheet[0].cell(row, col).value)
            self.__label_table.append(att)

        for att in range(0, 1):
            self.__label.clear()
            t = copy.copy(self.__table)
            t.append(self.__label_table[att])
            self.__label = self.__classifiedAtt__(len(t) - 1, t)
            root = Node(None, None, None)
            self.__generateTree__(0, root, t)
            self.__tree.append(root)
            print("tree is create " + str(att))
        return
Ejemplo n.º 9
0
def main():
    data = csvParser.parse(sys.argv[1])
    attributes = list(data[0].keys())
    root = Node('Root', None)
    build(data, attributes, root, 0.01)
    print(root.name)
    for child in root.children:
        print(child.name)
Ejemplo n.º 10
0
def main():
    # CHANGED TO READ FROM SPECIFIC FILE NOT ARGV[1]
    data = csvParser.parse("trunk/tree03-100-words.csv")
    attributes = list(data[0].keys())
    root = Node('Root', None)
    build(data, attributes, root, 0.01)
    s = etree.tostring(outputXML(root), pretty_print=True, encoding='unicode')
    print(s)
Ejemplo n.º 11
0
 def add_to_tree(self, node, key):
     if self.root is None:
         self.root = Node(key)
         self.count += 1
     else:
         if node.key == key:
             return
         if node.key > key:
             if node.left is None:
                 node.left = Node(key)
             else:
                 self.add_to_tree(node.left, key)
         else:
             if node.right is None:
                 node.right = Node(key)
             else:
                 self.add_to_tree(node.right, key)
         self.count += 1
Ejemplo n.º 12
0
    def from_json(self, filename):
        if self.trained:
            print(
                "This tree has already been trained. This procedure will purge the trained rules."
            )
            self.__untrain()

        with open(filename, "r") as jsonfile:
            tree_dict = json.load(jsonfile)

        #load basics
        try:
            self.depth_limit = tree_dict["depth"]
            self.dimensions = tree_dict["dimensions"]
            levels = tree_dict["levels"]
        except KeyError:
            print("Invalid JSON format")
            return False

        #first run, just create Node instances so that you can reference them in output/parents

        for i in xrange(len(levels)):
            key = "lvl%d" % i
            level = levels[key]
            if len(level) > 0:
                self.nodes.append([])
                for r in range(len(level)):
                    n = Node(level=i)
                    self.nodes[i].extend([n])

        for key, level in levels.iteritems():
            i = int(key[-1])
            if len(level) == 0:
                continue
            for n, node in enumerate(level):
                mynode = self.nodes[i][n]
                mynode.entropy = node["entropy"]

                if node["terminal"]:
                    mynode.outcome = [node["outcome"]]
                else:
                    mynode.feature = node["feature"]
                    mynode.threshold = node["threshold"]
                    left_outcome = self.nodes[i +
                                              1][node["outcome"][0]["index"]]
                    right_outcome = self.nodes[i +
                                               1][node["outcome"][1]["index"]]
                    mynode.outcome = [left_outcome, right_outcome]
                    mynode.terminal = False

                if i > 0:
                    parent = self.nodes[i - 1][node["parent_index"]]
                    mynode.parent = parent

        self.trained = True
        return self
Ejemplo n.º 13
0
class Tree:
    def __init__(self, rootVal):
        self.root = Node(rootVal)

    def addChild(self, newNode):
        kek = self.root.addChild(newNode)
        # print(kek)
        return kek

    def levelPrint(self):
        self.root.x = 0
        self.root.y = 0
        self.root.z = 0
        level_counter = 0
        self.root.printCoords()
        currentLevel = [self.root]

        nextLevel = []

        while len(currentLevel) > 0:
            level_counter += 1
            vals = []

            for child in currentLevel:
                vals += [child.getValue()]
                num_of_childs = len(child.getChildren())
                child_count = 0
                for child_1 in child.getChildren():

                    degrees = getDegrees(child_count, num_of_childs)

                    child_1.setZ(level_counter)
                    child_1.setX(child.x, degrees)
                    child_1.setY(child.y, degrees)
                    child_1.printCoords()
                    pEdeges(child, child_1)
                    child_count += 1
                    nextLevel += [child_1]

            print(vals)

            currentLevel = nextLevel
            nextLevel = []
Ejemplo n.º 14
0
def build(data, attributes, tree, threshold):

    if isUniform(d['Class'] for d in data):  # All class labels are the same
        tree.setName(data[0]['Class'])
    elif len(attributes) == 0:  # No more attributes
        tree.setName(mostFrequentCategory(data))
    else:
        bestAttribute = selectSplittingAttributeN(attributes, data, threshold)
        if not bestAttribute:  # No best attribute to split on
            tree.setName(mostFrequentCategory(data))
        else:
            valToSplit = bestAttribute[1]  # -1 if attribute is categorical
            bestAttribute = bestAttribute[0]

            tree.setName(bestAttribute)
            if valToSplit > 0:  # attribute is continuous
                # Split data on valToSplit
                splits = splitOnVal(data, bestAttribute, valToSplit)

                # Recursive call on data <= split val
                nodeLT = Node(None, "<= {}".format(valToSplit))
                tree.addChild(nodeLT)
                build(splits[0], attributes, nodeLT, threshold)

                # Recursive call on data > split val
                nodeGT = Node(None, "> {}".format(valToSplit))
                tree.addChild(nodeGT)
                build(splits[1], attributes, nodeGT, threshold)

            else:  # attribute is categorical
                attributeDict = groupByAttribute(data, bestAttribute)
                for attributeName in attributeDict.keys():
                    newData = attributeDict[attributeName]

                    if len(newData) > 0:
                        newAttributes = list(attributes)
                        newAttributes.remove(bestAttribute)

                        childNode = Node(None, attributeName)
                        tree.addChild(childNode)
                        build(newData, newAttributes, childNode, threshold)
Ejemplo n.º 15
0
def main():

    shrooms = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\agaricus-lepiota.data.csv"
    iris = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\iris.data"
    d = parsing.parseData(shrooms)
    data = d[0]
    attributes = d[1]

    root = Node('Root', None)
    dt.build(data, attributes, root, 0.1)
    print("**", classifyPointCat(root, data[1000]))
    """if not len(sys.argv) == 3:
Ejemplo n.º 16
0
    def add(self, item):
        newNode = Node(item)

        if self.__root is None:
            self.__root = newNode
            self.__items.append(self.__root)
        else:
            treeNode = self.__items[0]
            if treeNode.getLChild() is None:
                treeNode.setLChild(newNode)
                self.__items.append(treeNode.getLChild())
            elif treeNode.getRChild() is None:
                treeNode.setRChild(newNode)
                self.__items.append(treeNode.getLChild())
Ejemplo n.º 17
0
def test():
    T = Node(17)
    T.left = Node(13)
    T.right = Node(26)
    T.left.left = Node(14)
    T.left.right = Node(5)
    print("Preorder : ")
    PreorderTraverse(T)
    print("-------")
    print("Inorder : ")
    InorderTraverse(T)
    print("-------")
    print("Postorder : ")
    PostorderTraverse(T)
    print("-------")
    print("Levelorder : ")
    LevelorderTraverse(T)
Ejemplo n.º 18
0
def main():

    t11 = Node(1)
    t12 = Node(3)
    t13 = Node(2)
    t14 = Node(4)
    t15 = Node(5)
    t16 = Node(6)

    list1 = []
    list1.append(t12)
    list1.append(t13)
    list1.append(t14)

    list2 = []
    list2.append(t15)
    list2.append(t16)

    t11.children = list1
    t12.children = list2

    solution = Solution()
    output = solution.postorder(t11)
    print("Root Node: ", output)
    def _Decision_Tree(self, tree_node):
        x_data = tree_node.x_data
        y_data = tree_node.y_data

        feature_names = list(x_data.columns)
        xy_data = pd.concat([x_data, y_data], axis=1)
        label_name = list(xy_data.columns)[-1]
        label_unique = xy_data[label_name].unique()

        if len(label_unique) == 1:
            cate = y_data.loc[0]
            tree_node.category = cate
            return tree_node

        if len(feature_names) == 0:
            cate = y_data.value_counts(ascending=False).keys()[0]
            tree_node.category = cate
            return tree_node

        label_entr = self.label_entr(y_data)

        max_gain = 0
        for feature in feature_names:
            info_gain = self.info_gain(label_entr, feature, label_name,
                                       xy_data)
            if info_gain > max_gain:
                max_gain = info_gain
                f_name = feature

        if max_gain <= self.min_info_gain:
            cate = y_data.value_counts(ascending=False).keys()[0]
            tree_node.category = cate
            return tree_node

        tree_node.feature = f_name
        tree_node.children = dict()

        for sub_attribute in self.feature_item[tree_node.feature]:
            sub_data = xy_data[xy_data[f_name] == sub_attribute]
            sub_data_x = sub_data.drop(list(sub_data.columns)[-1], axis=1)
            sub_data_x.drop(tree_node.feature, axis=1, inplace=True)
            sub_data_y = sub_data[list(sub_data.columns)[-1]]
            child_node = Node(tree_node, None, None, None, sub_data_x,
                              sub_data_y)
            tree_node.children[sub_attribute] = Decision_Tree(child_node)
        return tree_node
Ejemplo n.º 20
0
def build(data, attributes, tree, threshold):
    if isUniform(dict['Category'] for dict in data):
        tree.setName(data[0]['Category'])
    elif len(attributes) == 0:
        tree.setName(mostFrequentCategory(data), None)
    else:  # Select splitting attribute
        bestAttribute = selectSplittingAttribute(data, attributes, threshold)
        if not bestAttribute:
            tree.setName(mostFrequentCategory(data))
        else:
            tree.setName(bestAttribute)
            attributeDict = groupByAttribute(data, bestAttribute)
            for attributeName in attributeDict.keys():
                newData = attributeDict[attributeName]
                if len(newData) > 0:
                    newAttributes = list(attributes)
                    newAttributes.remove(bestAttribute)
                    childNode = Node(None, attributeName)
                    tree.addChild(childNode)
                    build(newData, newAttributes, childNode, threshold)
Ejemplo n.º 21
0
def main():
    """
    if not len(sys.argv) >= 2:
        print("\t\tMissing arguments\n\tProper Call :\tpython C45.py <CSVFile> [<Restrictions>]")
        return

    dataFile = sys.argv[1]

    d = parsing.parseData(dataFile)
    data = d[0]
    attributes = d[1]

    if len(sys.argv) == 3:
        restrFile = sys.argv[2]
        with open(restrFile, 'r') as file:
            restr = file.read().split(',')

        attributes = restrictAttrib(attributes[:-1], restr[1:])

    """
    # This is all just print testing bs
    shrooms = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\agaricus-lepiota.data.csv"
    iris = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\iris.data"
    letters = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\letter-recognition.data.csv"
    data, attributes = parsing.parseData(letters)

    print(attributes)
    print("Number of records : {}\nWith  {}  different attributes".format(
        len(data), len(attributes)))
    #s = selectSplittingAttributeN(attributes, data, 0.01)
    #en = entropyBinSplit(data, s[0], s[1])
    #print(en)

    root = Node('Root', None)
    build(data, attributes, root, 0.3)
    xmlOutput = etree.tostring(outputXML(root),
                               pretty_print=True,
                               encoding='unicode')
    print(xmlOutput)
Ejemplo n.º 22
0
def BranchAndBoundAlgorithm():
    queue = deque()

    sortedItemList = [(index, Density(item))
                      for index, item in enumerate(ITEMS)]
    sortedItemList = sorted(sortedItemList, key=lambda x: x[1], reverse=True)

    bestNode = Node(0, 0.0, 0.0, 0.0, [])
    root = Node(0, 0.0, 0.0, getUpperBoundValue(bestNode, sortedItemList), [])
    queue.appendleft(root)

    while len(queue) > 0:
        currentNode = queue.pop()
        if currentNode.bound > bestNode.value:
            index = sortedItemList[currentNode.treeLevel][0]
            nextCheckedItemValue = ITEMS[index].value
            nextCheckedItemWeight = ITEMS[index].weight
            nextAddednode = Node(currentNode.treeLevel + 1,
                                 currentNode.value + nextCheckedItemValue,
                                 currentNode.weight + nextCheckedItemWeight,
                                 currentNode.bound,
                                 currentNode.selectedItems + [index])

            if nextAddednode.weight <= KNAPSACK_SIZE:
                if nextAddednode.value > bestNode.value:
                    bestNode = nextAddednode

                if nextAddednode.bound > bestNode.value:
                    queue.appendleft(nextAddednode)

            nextNotAddedNode = Node(currentNode.treeLevel + 1,
                                    currentNode.value, currentNode.weight,
                                    currentNode.bound,
                                    currentNode.selectedItems)
            nextNotAddedNode.bound = getUpperBoundValue(
                nextNotAddedNode, sortedItemList)
            if nextNotAddedNode.bound > bestNode.value:
                queue.appendleft(nextNotAddedNode)

    bestSolution = [0] * len(ITEMS)
    for itemBit in bestNode.selectedItems:
        bestSolution[itemBit] = 1
    return bestSolution, int(bestNode.value)
Ejemplo n.º 23
0
    #if leaf node return 1 (Bcz. leaf node is present at level 1)
    if root.left == None and root.right == None:
        return 1

    #recursively compute the levels of left and right subtree
    left_subtree_levels = levels(root.left)
    right_subtree_levels = levels(root.right)

    #compute the overall levels of tree
    total_levels = max(left_subtree_levels, right_subtree_levels) + 1

    return total_levels


a = Node(1)
b = Node(2)
c = Node(3)
d = Node(4)
e = Node(5)
f = Node(6)
g = Node(7)
h = Node(8)
a.left = b
a.right = c
b.left = d
b.right = e
c.left = f
d.left = g
g.left = h
Ejemplo n.º 24
0
 def createNode(self, val):
     node = Node()
     node.setVal(val)
     return node
Ejemplo n.º 25
0
    def __algorithm(self, S, labels, level=0, par_node=None, left=False, terminal_flag=False):
        #calculate initial entropy
        null_entropy = self.__impurity(labels)
        #check if everyone is in the same class
        if null_entropy <= 0. or level >= self.depth_limit or terminal_flag:
            #terminate the algorithm, everyone's been classified or maximum depth has been reached
            final_node = Node(parent=par_node,level=level,entropy=null_entropy)
            final_node.outcome[0] = self.__bestguess(labels)
            self.nodes[level].extend( [final_node] )
            return final_node
        else:
            #go over all the features in this dataset
            features = range(S.shape[1])
            min_entropy = np.inf
            best_split = [0,0] #this will hold feature number and threshold value for the best split
            for f in features:
                #try all possible splits along this feature
                #return the best (lowest) entropy
                #if this entropy is smaller then current minimum, update
                Sfeat = S[:,f]
                split, entropy = self.__bestsplit(Sfeat, labels)
                if entropy < min_entropy:
                    min_entropy = entropy
                    best_split = [f, split]

            new_node = Node(feature=best_split[0], threshold=best_split[1], parent=par_node, level=level, entropy=min_entropy)
            self.nodes[level].extend( [new_node] )
            #split dataset
            #check if S is a vector
            if len(S.shape) == 1:
                #S is a one-feature vector
                S = S.reshape((len(S),1))

            leftMask = S[:,best_split[0]] <= best_split[1]
            rightMask = S[:,best_split[0]] > best_split[1]
            features.remove(best_split[0])

            leftLabels = labels[leftMask]
            rightLabels = labels[rightMask]

            # check if you shouldn't terminate here
            # when the split puts all samples into left or right branch

            if leftMask.all():
                new_node.make_terminal(self.__bestguess(leftLabels))
                return new_node
            if rightMask.all():
                new_node.make_terminal(self.__bestguess(rightLabels))
                return new_node

            if len(features) == 0:
                leftS = S[leftMask,:]
                rightS = S[rightMask,:]
                terminal_flag = True
            else:
                leftS = S[leftMask,:][:,features]
                rightS = S[rightMask,:][:,features]

            #check if you shouldn't terminate here

            if len(leftS) == 0 or leftS.shape[1] == 0:
                new_node.make_terminal(self.__bestguess(rightLabels))
                return new_node
            if len(rightS) == 0 or rightS.shape[1] == 0:
                new_node.make_terminal(self.__bestguess(leftLabels))
                return new_node

            #check if a level below you already exists
            try:
                self.nodes[level+1]
            except IndexError:
                self.nodes.append([])

            #recursively call self again on the two children nodes
            new_node.outcome[0] = self.__algorithm(leftS,leftLabels,level=level+1,par_node=new_node,terminal_flag=terminal_flag)
            new_node.outcome[1] = self.__algorithm(rightS,rightLabels,level=level+1,par_node=new_node,terminal_flag=terminal_flag)
            return new_node
Ejemplo n.º 26
0
 def insert(self, val):
     if self.root:
         self._insert(val, self.root)
     else:
         self.root = Node(value=val)
     self.size += 1
    if root.left==None and root.right==None:
      return 0

    #recursively compute intermediate nodes in left and right subtree 
    left_subtree_non_leaf=non_leaf(root.left)
    right_subtree_non_leaf=non_leaf(root.right)

    #compute the total intermediates node by considering current intermediate node , left_subtree_non_leaf node and right_subtree_non_leaf node
    total_non_leaf=left_subtree_non_leaf+right_subtree_non_leaf+1
    
    return total_non_leaf
 
    
    

a=Node(1)
b=Node(2)
c=Node(3)
d=Node(4)
e=Node(5)
f=Node(6)
g=Node(7)
a.left=b
a.right=c
b.left=d
b.right=e
c.left=f
c.right=g

result=non_leaf(a)
print(result)
 def __init__(self, min_info_gain, x_data, y_data):
     self.min_info_gain = min_info_gain
     self.root_node = Node(x_data=x_data, y_data=y_data)
     self.feature_item = self.get_feature(x_data)
Ejemplo n.º 29
0
        r = self.LCA(a, b, node.right)

        if l and r:
            print node.data
            return node

        if l:
            return l

        if r:
            return r


if __name__ == '__main__':
    t = BinaryTree()
    t.add(Node(5))
    t.add(Node(2))
    t.add(Node(1))
    t.add(Node(3))
    t.add(Node(4))
    t.add(Node(7))
    t.add(Node(6))
    t.add(Node(8))
    t.preOrder(t.root)
    print '\n======='
    t.preOrderIt()
    print '\n======='
    t.inOrder(t.root)
    print '\n======='
    t.postOrder(t.root)
    print '\n======='
Ejemplo n.º 30
0
 def __init__(self, rootVal):
     self.root = Node(rootVal)
        if root.left.data <= root.data:
            return is_bst(root.left)
        else:
            return False

    #intermidiate node with right children only
    elif root.right != None:
        if root.data <= root.right.data:
            return is_bst(root.right)
        else:
            return False

    # return False


n1 = Node(1)
n2 = Node(2)
n3 = Node(3)
n4 = Node(4)
n5 = Node(5)
n6 = Node(6)
n7 = Node(7)

n4.left = n2
n4.right = n6
n2.left = n1
n2.right = n3
n6.left = n5
n6.right = n7

result = is_bst(n4)