Beispiel #1
0
def Main (filename, ParentNode, MapParentNode) :
        # repeat for all categories
        for category in MapParentNode :
            #check that the category is one class

            if OneClass(filename, ParentNode, category)==False:
                #compute the entropy
                categoryEntropy=CategoryEntropy(filename,ParentNode.name,category)
                # for all attributes not already in the tree
                gains= dict()
                for attribute in attributes:
                    if isParent(ParentNode,attribute)== False :
                        tmpMap={}
                        for key in (attributes[attribute]):
                            tmpMap[key]=[0,0]
                        # for each row of the file
                        with open(filename, 'rb') as csvfile:
                            csvreader = csv.reader(csvfile, delimiter=',')
                            for row in csvreader:
                                if row[ParentNode.name]== category :
                                    for key in tmpMap :
                                        if row[attribute] == key :
                                            if row[14] == ">50K":
                                                tmpMap[key][0]=tmpMap[key][0]+1
                                            else :
                                                tmpMap[key][1]=tmpMap[key][1]+1
                        # Gain
                        Entropy=0
                        for key in attributes[attribute] :
                            nb=tmpMap[key][0]+tmpMap[key][1]
                            if tmpMap[key][0] !=0  and tmpMap[key][1] !=0:
                                Entropy=Entropy+((tmpMap[key][0]/float(nb))*math.log(tmpMap[key][0]/float(nb),2) + (tmpMap[key][1]/float(nb))*math.log(tmpMap[key][1]/float(nb),2))*float(nb)/len(attributes[ParentNode.name][category])
                            else :
                                Entropy =0
                        gain= -categoryEntropy + Entropy
                        gains[attribute]=gain


                if(len(gains)!=0) :
                    # obtain the highest gain
                    att=-1
                    max_val = max(gains.itervalues())
                    for k, v in gains.iteritems() :
                        if v == max_val :
                            att=k

                    # Create the Node
                    NewNode= Node()
                    NewNode.name= att
                    # add parents
                    NewNode.parents= ParentNode.parents.copy()
                    NewNode.parents[ParentNode.name]= category
                    NewNode.parents[NewNode.name]='None'

                    # Add the node as a child of parent Node
                    ParentNode.children[category]=NewNode

                    # repeat for children
                    Main(filename,NewNode,attributes.get(NewNode.name))
                else :
                    #No more attribute, but still not a class
                    NewNode = Node()
                    NewNode.name= -1
                    NewNode.parents= ParentNode.parents.copy()
                    NewNode.parents[ParentNode.name]= category
                    NewNode.parents[NewNode.name]='None'
                    # Add the node as a child of parent Node
                    ParentNode.children[category]=NewNode

            else :
                    # the category is a class
                    NewNode = Node()
                    NewNode.name= -1
                    bool,NewNode.Class=OneClass(filename, ParentNode, category)
                    NewNode.parents= ParentNode.parents.copy()
                    NewNode.parents[ParentNode.name]= category
                    NewNode.parents[NewNode.name]='None'
                    # Add the node as a child of parent Node
                    ParentNode.children[category]=NewNode
Beispiel #2
0
gain.append(0)
gain.append(Gain("adult1.data", education,3,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))
# ignore Education Number attribute
gain.append(0)
gain.append(Gain("adult1.data", maritalStatus,5,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))
gain.append(Gain("adult1.data", occupation,6,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))
gain.append(Gain("adult1.data", relationship,7,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))
gain.append(Gain("adult1.data", race,8,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))
gain.append(Gain("adult1.data", sex,9,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))
gain.append(Gain("adult1.data", capitalGain,10,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))
gain.append(Gain("adult1.data", capitalLoss,11,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))
gain.append(Gain("adult1.data", hours,12,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))
gain.append(Gain("adult1.data", nativeCountry,13,nbRows("adult1.data"), EntropyAllData("adult1.data",nbRows("adult1.data"))))

# Initialisation
Tree=Node()
Tree.name=gain.index(max(gain))
Tree.parents[Tree.name]='None'

# Build decision Tree
Main ("adult1.data",Tree,attributes[Tree.name])


#Apply Decision Tree
DecisionTree("Test", "Test1", Tree)

#output
output( "Test1","Test2")