def PRINT_TREE_AT_LEVEL_2(): # A5 print(" ") print("LEVEL 1:") print(m.attributes[4]) Att = [None] * 4 for value in range(1, 5): Att[value - 1] = select(m.monk1, m.attributes[4], value) print("LEVEL 2:") for A in Att: tmp = bestAttribute(A, m.attributes) print(tmp) if tmp == m.attributes[0]: for value in range(1, 4): print(mostCommon(select(A, tmp, value))) if tmp == m.attributes[1]: for value in range(1, 4): print(mostCommon(select(A, tmp, value))) if tmp == m.attributes[2]: for value in range(1, 3): print(mostCommon(select(A, tmp, value))) if tmp == m.attributes[3]: for value in range(1, 4): print(mostCommon(select(A, tmp, value))) if tmp == m.attributes[4]: for value in range(1, 5): print(mostCommon(select(A, tmp, value))) if tmp == m.attributes[5]: for value in range(1, 3): print(mostCommon(select(A, tmp, value))) print(" ") t = buildTree(m.monk1, m.attributes) drawTree(t)
def informationGainCalculation(): print("Information gain results ", "\n") for attributeIndex in range(0, 6): result = d.averageGain(m.monk1, m.attributes[attributeIndex]) print("Monk1| ", attributeIndex + 1, ": ", result, " ") print("Best attribute: ", d.bestAttribute(m.monk1, m.attributes), "\n") for attributeIndex in range(0, 6): result = d.averageGain(m.monk2, m.attributes[attributeIndex]) print("Monk2| ", attributeIndex + 1, ": ", result, " ") print("Best attribute: ", d.bestAttribute(m.monk2, m.attributes), "\n") for attributeIndex in range(0, 6): result = d.averageGain(m.monk3, m.attributes[attributeIndex]) print("Monk3| ", attributeIndex + 1, ": ", result, " ") print("Best attribute: ", d.bestAttribute(m.monk3, m.attributes), "\n")
def caspersky(dataset): print("Assignment 3") a = d.bestAttribute(dataset, m.attributes) branches = [] for v in a.values: s = d.select(dataset, a, v) tf = d.mostCommon(s) if tf == True: branches.append((v, d.TreeLeaf(s))) else: a2 = d.bestAttribute(s, m.attributes) branches2 = [] for v2 in a2.values: s2 = d.select(s, a2, v2) branches2.append((v2, d.TreeLeaf(d.mostCommon(s2)))) branches.append((v, d.TreeNode(a2, dict(branches2), d.mostCommon(s)))) drawtree.drawTree(d.TreeNode(a, dict(branches), d.mostCommon(dataset)))
def buildTree(subset,attrs): global tree if isLeaf(subset): tree = (tree + '+') if d.allPositive(subset) else (tree + '-') return else: root = d.bestAttribute(subset,attrs) tree = tree + str(root) + "(" for value in root.values: nextSubset = d.select(subset,root,value) nextAttrs = attrs - set([root]) buildTree(nextSubset,nextAttrs) tree = tree + ")"
def makeTree(set, level, attributes): if level >= depth: return dtree.TreeLeaf(dtree.mostCommon(set)) attr = dtree.bestAttribute(set, attributes) node = [] branches = [] for val in attr.values: subset = dtree.select(set, attr, val) attributes_left = [a for a in attributes if a != attr] if dtree.allPositive(subset): node = dtree.TreeLeaf(True) elif dtree.allNegative(subset): node = dtree.TreeLeaf(False) else: node = makeTree(subset, level + 1, attributes_left) branches.append((val, node)) node = dtree.TreeNode(attr, dict(branches), dtree.mostCommon(set)) return node
def Tree(dataset, attributes, maxdepth=3): def Branch(dataset, default, attributes): if not dataset: return dtree.TreeLeaf(default) if dtree.allPositive(dataset): return dtree.TreeLeaf(True) if dtree.allNegative(dataset): return dtree.TreeLeaf(False) return Tree(dataset, attributes, maxdepth - 1) default = dtree.mostCommon(dataset) if maxdepth < 1: return dtree.TreeLeaf(default) a = dtree.bestAttribute(dataset, attributes) attributesLeft = [x for x in attributes if x != a] branches = [(v, Branch(dtree.select(dataset, a, v), default, attributesLeft)) for v in a.values] return dtree.TreeNode(a, dict(branches), default)
def buildTreeCustom(dataset, depth): if (depth > 0): bestAttr = dt.bestAttribute(dataset, m.attributes) print(str(bestAttr), end='') # Select datasets splits for each value of the bestAttr splits = [] for value in bestAttr.values: splits.append(dt.select(dataset, bestAttr, value)) for split in splits: # If entropy of the split > 0, the split is impure and we can further split it. Recursive call with reduced depth if (dt.entropy(split) > 0): buildTreeCustom(split, depth - 1) else: print('+' if dt.mostCommon(split) else '-', end='') else: print('+' if dt.mostCommon(dataset) else '-', end='')
def calc_next_level(): #print "\nAverage gain when a5 is choosen" print "\nA5\t a1\t\t a2\t\t a3\t\t a4\t\t a5\t\t a6" s = "A5(" for val in data.attributes[4].values: subset = dt.select(data.monk1, data.attributes[4], val) t = "\t" for attr in data.attributes: t = t + "%.6f\t" % (dt.averageGain(subset, attr)) print val , t best = dt.bestAttribute(subset, data.attributes) s = s + best.name + "(" #print "best attribute: ", best.name for value in best.values: #print "choose: ", value, "mostCommon: ", dt.mostCommon(dt.select(subset, best, value)) if(dt.mostCommon(dt.select(subset, best, value))): s = s + "+" else: s = s + "-" s = s + ")" s = s + ")" print "\nOur tree:\t", s print "Build tree:\t", dt.buildTree(data.monk1, data.attributes, 2)
# gain1=dtree.averageGain(mdata.monk1 , mdata.attributes[index]) # gain2=dtree.averageGain(mdata.monk2 , mdata.attributes[index]) # gain3=dtree.averageGain(mdata.monk3 , mdata.attributes[index]) # ag1.append(gain1) # ag2.append(gain2) # ag3.append(gain3) # print(ag1) #a5 # print(ag2) #a5 # print(ag3) #a2 a5 #********************************** # Assignment 5 a = dtree.bestAttribute(mdata.monk1, mdata.attributes) attributesLeft = [x for x in mdata.attributes if x != a] #print(a,attributesLeft) #a5 subsets = [] for v in a.values: temp = dtree.select(mdata.monk1, a, v) subsets.append(temp) ag_in2level = [] subsets_ag = [] #print(len(a.values)) for subset in subsets: for i in range(len(attributesLeft)): gain1 = dtree.averageGain(subset, attributesLeft[i]) ag_in2level.append(gain1)
print("Monk3 has entropy: ", dt.entropy(m.monk3)) print("Monk1, attribute a1 has information gain: ", dt.averageGain(m.monk1, m.attributes[0])) print("Monk1, attribute a2 has information gain: ", dt.averageGain(m.monk1, m.attributes[1])) print("Monk1, attribute a3 has information gain: ", dt.averageGain(m.monk1, m.attributes[2])) print("Monk1, attribute a4 has information gain: ", dt.averageGain(m.monk1, m.attributes[3])) print("Monk1, attribute a5 has information gain: ", dt.averageGain(m.monk1, m.attributes[4])) print("Monk1, attribute a6 has information gain: ", dt.averageGain(m.monk1, m.attributes[5])) print("Monk1's best attribute is: ", dt.bestAttribute(m.monk1, m.attributes)) print("\n") print("Monk2, attribute a1 has information gain: ", dt.averageGain(m.monk2, m.attributes[0])) print("Monk2, attribute a2 has information gain: ", dt.averageGain(m.monk2, m.attributes[1])) print("Monk2, attribute a3 has information gain: ", dt.averageGain(m.monk2, m.attributes[2])) print("Monk2, attribute a4 has information gain: ", dt.averageGain(m.monk2, m.attributes[3])) print("Monk2, attribute a5 has information gain: ", dt.averageGain(m.monk2, m.attributes[4])) print("Monk2, attribute a6 has information gain: ", dt.averageGain(m.monk2, m.attributes[5]))
def getAverageInformationGain(dataset,name): print("Information Gain of:"+name+":") for i in range (len(m.attributes)): print("InformationGain of "+name +" "+m.attributes[i].name+" : "+ str(d.averageGain(dataset,m.attributes[i]))) print("The Best Attribute for splitting the result "+ name +":"+ str(d.bestAttribute(dataset,m.attributes)))
def get_gain(monk): gain_list = [] for attribute in range(6): gain_list.append(dtree.averageGain(monk, m.attributes[attribute])) return gain_list gain_monk1 = get_gain(monk1) gain_monk2 = get_gain(monk2) gain_monk3 = get_gain(monk3) print(gain_monk1) print(gain_monk2) print(gain_monk3) BestAttribute = dtree.bestAttribute(monk3, m.attributes) print(BestAttribute) #monk1_A5_1 = dtree.select(monk1,m.attributes[5],1) #print(monk1_A5_1) monk1_tree = dtree.buildTree(monk1, m.attributes) #graf1 = dt.drawTree(monk1_tree) monk2_tree = dtree.buildTree(monk2, m.attributes) #graf2 = dt.drawTree(monk2_tree) monk3_tree = dtree.buildTree(monk3, m.attributes) #graf3 = dt.drawTree(monk3_tree) print(dtree.check(monk1_tree, m.monk1))
def printInformationGainOfDataset(dataset, name): print("\nInformation gain of " + name + ":") for i in range(len(m.attributes)): print(m.attributes[i].name + ": " + str(d.averageGain(dataset, m.attributes[i]))) print("Best attribute is: " + str(d.bestAttribute(dataset, m.attributes)))
print("MONK 3:", dt.entropy(m.monk3)) print("") # ASSIGNMENT 3 monks = [m.monk1, m.monk2, m.monk3] for monk_id, monk in enumerate(monks): print("Monk:", monk_id+1) for i in range(6): gain = dt.averageGain(monk, m.attributes[i]) print("A" + str(i+1) + ": " + str(gain)) print("") # ASSIGNMENT 4 for monk_id, monk in enumerate(monks): print("Monk:", monk_id+1) best_atribute = dt.bestAttribute(monk, m.attributes) print("Best attribute: " + str(best_atribute)) for value in best_atribute.values: subset = dt.select(monk, best_atribute, value) entropy = dt.entropy(subset) print("Entropy " + str(value) + ": " + str(entropy)) # print("Next level information gains:") # for i in range(6): # gain = dt.averageGain(monk, m.attributes[i]) # print("A" + str(i+1) + ": " + str(gain)) print("") best_atribute = dt.bestAttribute(m.monk1, m.attributes) for value in best_atribute.values: subset = dt.select(m.monk1, best_atribute, value)