def Main (filename, ParentNode, MapParentNode) : # repeat for all categories for category in MapParentNode : #check that the category is one class if OneClass(filename, ParentNode, category)==False: #compute the entropy categoryEntropy=CategoryEntropy(filename,ParentNode.name,category) # for all attributes not already in the tree gains= dict() for attribute in attributes: if isParent(ParentNode,attribute)== False : tmpMap={} for key in (attributes[attribute]): tmpMap[key]=[0,0] # for each row of the file with open(filename, 'rb') as csvfile: csvreader = csv.reader(csvfile, delimiter=',') for row in csvreader: if row[ParentNode.name]== category : for key in tmpMap : if row[attribute] == key : if row[14] == ">50K": tmpMap[key][0]=tmpMap[key][0]+1 else : tmpMap[key][1]=tmpMap[key][1]+1 # Gain Entropy=0 for key in attributes[attribute] : nb=tmpMap[key][0]+tmpMap[key][1] if tmpMap[key][0] !=0 and tmpMap[key][1] !=0: Entropy=Entropy+((tmpMap[key][0]/float(nb))*math.log(tmpMap[key][0]/float(nb),2) + (tmpMap[key][1]/float(nb))*math.log(tmpMap[key][1]/float(nb),2))*float(nb)/len(attributes[ParentNode.name][category]) else : Entropy =0 gain= -categoryEntropy + Entropy gains[attribute]=gain if(len(gains)!=0) : # obtain the highest gain att=-1 max_val = max(gains.itervalues()) for k, v in gains.iteritems() : if v == max_val : att=k # Create the Node NewNode= Node() NewNode.name= att # add parents NewNode.parents= ParentNode.parents.copy() NewNode.parents[ParentNode.name]= category NewNode.parents[NewNode.name]='None' # Add the node as a child of parent Node ParentNode.children[category]=NewNode # repeat for children Main(filename,NewNode,attributes.get(NewNode.name)) else : #No more attribute, but still not a class NewNode = Node() NewNode.name= -1 NewNode.parents= ParentNode.parents.copy() NewNode.parents[ParentNode.name]= category NewNode.parents[NewNode.name]='None' # Add the node as a child of parent Node ParentNode.children[category]=NewNode else : # the category is a class NewNode = Node() NewNode.name= -1 bool,NewNode.Class=OneClass(filename, ParentNode, category) NewNode.parents= ParentNode.parents.copy() NewNode.parents[ParentNode.name]= category NewNode.parents[NewNode.name]='None' # Add the node as a child of parent Node ParentNode.children[category]=NewNode
gain.append(0) gain.append(Gain("adult1.data", education,3,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) # ignore Education Number attribute gain.append(0) gain.append(Gain("adult1.data", maritalStatus,5,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) gain.append(Gain("adult1.data", occupation,6,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) gain.append(Gain("adult1.data", relationship,7,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) gain.append(Gain("adult1.data", race,8,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) gain.append(Gain("adult1.data", sex,9,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) gain.append(Gain("adult1.data", capitalGain,10,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) gain.append(Gain("adult1.data", capitalLoss,11,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) gain.append(Gain("adult1.data", hours,12,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) gain.append(Gain("adult1.data", nativeCountry,13,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data")))) # Initialisation Tree=Node() Tree.name=gain.index(max(gain)) Tree.parents[Tree.name]='None' # Build decision Tree Main ("adult1.data",Tree,attributes[Tree.name]) #Apply Decision Tree DecisionTree("Test", "Test1", Tree) #output output( "Test1","Test2")