Exemple #1
0
def PRINT_TREE_AT_LEVEL_2():
    # A5
    print(" ")
    print("LEVEL 1:")
    print(m.attributes[4])
    Att = [None] * 4
    for value in range(1, 5):
        Att[value - 1] = select(m.monk1, m.attributes[4], value)

    print("LEVEL 2:")
    for A in Att:
        tmp = bestAttribute(A, m.attributes)
        print(tmp)
        if tmp == m.attributes[0]:
            for value in range(1, 4):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[1]:
            for value in range(1, 4):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[2]:
            for value in range(1, 3):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[3]:
            for value in range(1, 4):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[4]:
            for value in range(1, 5):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[5]:
            for value in range(1, 3):
                print(mostCommon(select(A, tmp, value)))
    print(" ")
    t = buildTree(m.monk1, m.attributes)
    drawTree(t)
Exemple #2
0
def informationGainCalculation():

    print("Information gain results ", "\n")
    for attributeIndex in range(0, 6):
        result = d.averageGain(m.monk1, m.attributes[attributeIndex])
        print("Monk1|   ", attributeIndex + 1, ": ", result, "    ")
    print("Best attribute: ", d.bestAttribute(m.monk1, m.attributes), "\n")

    for attributeIndex in range(0, 6):
        result = d.averageGain(m.monk2, m.attributes[attributeIndex])
        print("Monk2|   ", attributeIndex + 1, ": ", result, "    ")
    print("Best attribute: ", d.bestAttribute(m.monk2, m.attributes), "\n")

    for attributeIndex in range(0, 6):
        result = d.averageGain(m.monk3, m.attributes[attributeIndex])
        print("Monk3|   ", attributeIndex + 1, ": ", result, "    ")
    print("Best attribute: ", d.bestAttribute(m.monk3, m.attributes), "\n")
Exemple #3
0
def caspersky(dataset):
    print("Assignment 3")
    a = d.bestAttribute(dataset, m.attributes)
    branches = []
    for v in a.values:
        s = d.select(dataset, a, v)
        tf = d.mostCommon(s)
        if tf == True:
            branches.append((v, d.TreeLeaf(s)))
        else:
            a2 = d.bestAttribute(s, m.attributes)
            branches2 = []
            for v2 in a2.values:
                s2 = d.select(s, a2, v2)
                branches2.append((v2, d.TreeLeaf(d.mostCommon(s2))))
            branches.append((v, d.TreeNode(a2, dict(branches2), d.mostCommon(s))))
    
    drawtree.drawTree(d.TreeNode(a, dict(branches), d.mostCommon(dataset)))
Exemple #4
0
def caspersky(dataset):
    print("Assignment 3")
    a = d.bestAttribute(dataset, m.attributes)
    branches = []
    for v in a.values:
        s = d.select(dataset, a, v)
        tf = d.mostCommon(s)
        if tf == True:
            branches.append((v, d.TreeLeaf(s)))
        else:
            a2 = d.bestAttribute(s, m.attributes)
            branches2 = []
            for v2 in a2.values:
                s2 = d.select(s, a2, v2)
                branches2.append((v2, d.TreeLeaf(d.mostCommon(s2))))
            branches.append((v, d.TreeNode(a2, dict(branches2),
                                           d.mostCommon(s))))

    drawtree.drawTree(d.TreeNode(a, dict(branches), d.mostCommon(dataset)))
Exemple #5
0
def buildTree(subset,attrs):
	global tree
	if isLeaf(subset):
		tree = (tree + '+') if d.allPositive(subset) else (tree + '-')
		return
	else:
		root = d.bestAttribute(subset,attrs)
		tree = tree + str(root) + "("
		for value in root.values:
			nextSubset = d.select(subset,root,value)
			nextAttrs = attrs - set([root])
			buildTree(nextSubset,nextAttrs)
		tree = tree + ")"
Exemple #6
0
def makeTree(set, level, attributes):
    if level >= depth:
        return dtree.TreeLeaf(dtree.mostCommon(set))
    attr = dtree.bestAttribute(set, attributes)
    node = []
    branches = []
    for val in attr.values:
        subset = dtree.select(set, attr, val)
        attributes_left = [a for a in attributes if a != attr]
        if dtree.allPositive(subset):
            node = dtree.TreeLeaf(True)
        elif dtree.allNegative(subset):
            node = dtree.TreeLeaf(False)
        else:
            node = makeTree(subset, level + 1, attributes_left)
        branches.append((val, node))
    node = dtree.TreeNode(attr, dict(branches), dtree.mostCommon(set))
    return node
def Tree(dataset, attributes, maxdepth=3):
    def Branch(dataset, default, attributes):
        if not dataset:
            return dtree.TreeLeaf(default)
        if dtree.allPositive(dataset):
            return dtree.TreeLeaf(True)
        if dtree.allNegative(dataset):
            return dtree.TreeLeaf(False)
        return Tree(dataset, attributes, maxdepth - 1)

    default = dtree.mostCommon(dataset)
    if maxdepth < 1:
        return dtree.TreeLeaf(default)
    a = dtree.bestAttribute(dataset, attributes)
    attributesLeft = [x for x in attributes if x != a]
    branches = [(v, Branch(dtree.select(dataset, a, v), default,
                           attributesLeft)) for v in a.values]
    return dtree.TreeNode(a, dict(branches), default)
Exemple #8
0
def buildTreeCustom(dataset, depth):
    if (depth > 0):
        bestAttr = dt.bestAttribute(dataset, m.attributes)
        print(str(bestAttr), end='')

        # Select datasets splits for each value of the bestAttr
        splits = []
        for value in bestAttr.values:
            splits.append(dt.select(dataset, bestAttr, value))

        for split in splits:
            # If entropy of the split > 0, the split is impure and we can further split it. Recursive call with reduced depth
            if (dt.entropy(split) > 0):
                buildTreeCustom(split, depth - 1)
            else:
                print('+' if dt.mostCommon(split) else '-', end='')
    else:
        print('+' if dt.mostCommon(dataset) else '-', end='')
Exemple #9
0
def calc_next_level():
  #print "\nAverage gain when a5 is choosen"
  print "\nA5\t  a1\t\t  a2\t\t  a3\t\t  a4\t\t  a5\t\t  a6"
  s = "A5(" 
  for val in data.attributes[4].values:
    subset = dt.select(data.monk1, data.attributes[4], val)
    t = "\t"
    for attr in data.attributes: 
      t = t + "%.6f\t" % (dt.averageGain(subset, attr))
    print val , t
    best = dt.bestAttribute(subset, data.attributes)
    s = s + best.name + "("
    #print "best attribute: ", best.name
    for value in best.values:
      #print "choose: ", value, "mostCommon: ", dt.mostCommon(dt.select(subset, best, value))
      if(dt.mostCommon(dt.select(subset, best, value))): 
        s = s + "+"
      else:
        s = s + "-"
    s = s + ")"
  s = s + ")"
  print "\nOur tree:\t", s
  print "Build tree:\t", dt.buildTree(data.monk1, data.attributes, 2)
#     gain1=dtree.averageGain(mdata.monk1 , mdata.attributes[index])
#     gain2=dtree.averageGain(mdata.monk2 , mdata.attributes[index])
#     gain3=dtree.averageGain(mdata.monk3 , mdata.attributes[index])

#     ag1.append(gain1)
#     ag2.append(gain2)
#     ag3.append(gain3)

# print(ag1) #a5
# print(ag2) #a5
# print(ag3) #a2 a5

#**********************************
# Assignment 5

a = dtree.bestAttribute(mdata.monk1, mdata.attributes)
attributesLeft = [x for x in mdata.attributes if x != a]
#print(a,attributesLeft) #a5

subsets = []
for v in a.values:
    temp = dtree.select(mdata.monk1, a, v)
    subsets.append(temp)

ag_in2level = []
subsets_ag = []
#print(len(a.values))
for subset in subsets:
    for i in range(len(attributesLeft)):
        gain1 = dtree.averageGain(subset, attributesLeft[i])
        ag_in2level.append(gain1)
Exemple #11
0
print("Monk3 has entropy: ", dt.entropy(m.monk3))

print("Monk1, attribute a1 has information gain: ",
      dt.averageGain(m.monk1, m.attributes[0]))
print("Monk1, attribute a2 has information gain: ",
      dt.averageGain(m.monk1, m.attributes[1]))
print("Monk1, attribute a3 has information gain: ",
      dt.averageGain(m.monk1, m.attributes[2]))
print("Monk1, attribute a4 has information gain: ",
      dt.averageGain(m.monk1, m.attributes[3]))
print("Monk1, attribute a5 has information gain: ",
      dt.averageGain(m.monk1, m.attributes[4]))
print("Monk1, attribute a6 has information gain: ",
      dt.averageGain(m.monk1, m.attributes[5]))

print("Monk1's best attribute is: ", dt.bestAttribute(m.monk1, m.attributes))

print("\n")

print("Monk2, attribute a1 has information gain: ",
      dt.averageGain(m.monk2, m.attributes[0]))
print("Monk2, attribute a2 has information gain: ",
      dt.averageGain(m.monk2, m.attributes[1]))
print("Monk2, attribute a3 has information gain: ",
      dt.averageGain(m.monk2, m.attributes[2]))
print("Monk2, attribute a4 has information gain: ",
      dt.averageGain(m.monk2, m.attributes[3]))
print("Monk2, attribute a5 has information gain: ",
      dt.averageGain(m.monk2, m.attributes[4]))
print("Monk2, attribute a6 has information gain: ",
      dt.averageGain(m.monk2, m.attributes[5]))
def getAverageInformationGain(dataset,name):
    print("Information Gain of:"+name+":")
    for i in range (len(m.attributes)):
        print("InformationGain of "+name +" "+m.attributes[i].name+" : "+ str(d.averageGain(dataset,m.attributes[i])))
    print("The Best Attribute for splitting the result "+ name +":"+ str(d.bestAttribute(dataset,m.attributes)))
Exemple #13
0
def get_gain(monk):
    gain_list = []
    for attribute in range(6):
        gain_list.append(dtree.averageGain(monk, m.attributes[attribute]))
    return gain_list


gain_monk1 = get_gain(monk1)
gain_monk2 = get_gain(monk2)
gain_monk3 = get_gain(monk3)

print(gain_monk1)
print(gain_monk2)
print(gain_monk3)

BestAttribute = dtree.bestAttribute(monk3, m.attributes)
print(BestAttribute)

#monk1_A5_1 = dtree.select(monk1,m.attributes[5],1)
#print(monk1_A5_1)

monk1_tree = dtree.buildTree(monk1, m.attributes)
#graf1 = dt.drawTree(monk1_tree)

monk2_tree = dtree.buildTree(monk2, m.attributes)
#graf2 = dt.drawTree(monk2_tree)

monk3_tree = dtree.buildTree(monk3, m.attributes)
#graf3 = dt.drawTree(monk3_tree)

print(dtree.check(monk1_tree, m.monk1))
Exemple #14
0
def printInformationGainOfDataset(dataset, name):
    print("\nInformation gain of " + name + ":")
    for i in range(len(m.attributes)):
        print(m.attributes[i].name + ": " +
              str(d.averageGain(dataset, m.attributes[i])))
    print("Best attribute is: " + str(d.bestAttribute(dataset, m.attributes)))
Exemple #15
0
    print("MONK 3:", dt.entropy(m.monk3))
    print("")

    # ASSIGNMENT 3
    monks = [m.monk1, m.monk2, m.monk3]
    for monk_id, monk in enumerate(monks):
        print("Monk:", monk_id+1)
        for i in range(6):
            gain = dt.averageGain(monk, m.attributes[i])
            print("A" + str(i+1) + ": " + str(gain))
    print("")

    # ASSIGNMENT 4
    for monk_id, monk in enumerate(monks):
        print("Monk:", monk_id+1)
        best_atribute = dt.bestAttribute(monk, m.attributes)
        print("Best attribute: " + str(best_atribute))
        for value in best_atribute.values:
            subset = dt.select(monk, best_atribute, value)
            entropy = dt.entropy(subset)
            print("Entropy " + str(value) + ": " + str(entropy))
            # print("Next level information gains:")
            # for i in range(6):
            #     gain = dt.averageGain(monk, m.attributes[i])
            #     print("A" + str(i+1) + ": " + str(gain))
    print("")


    best_atribute = dt.bestAttribute(m.monk1, m.attributes)
    for value in best_atribute.values:
        subset = dt.select(m.monk1, best_atribute, value)