Exemple #1
0
def getEntropy(attribute, examples):
    entropy = 0
    #TODO:can be optimized further
    for attribVal in Attribute.getAttributes()[attribute]:
        posEx = 0
        negEx = 0
        for example in examples:
            if example.attributes[attribute] == attribVal:
                if example.label == True:
                    posEx += 1
                else:
                    negEx += 1
        totalAttribEx = float(posEx + negEx)
        if totalAttribEx != 0:
            aInfo, bInfo = float(posEx)/totalAttribEx, float(negEx)/totalAttribEx
            entropy += ( totalAttribEx/len(examples) )\
                *getInfoConent([aInfo, bInfo])
    return entropy
Exemple #2
0
def decisionListLearning(examples):
    #if examples are empty then return trivial list node
    #with predicted label as 'No'/False
    if len(examples) == 0:
        return ListNode({}, {}, {}, False)
    #find a test S.T. subset of examples either all +ve or -ve
    (selectedAttributes, isSelectedIncl, selectedPureLabel, selectedOp) \
        = findTest(examples, Attribute.getAttributes())
    if not len(selectedAttributes):
        raise (DListCreationErr('couldn\'t find attributes for even split.'))

    listNode =  ListNode(selectedAttributes, isSelectedIncl, selectedOp,\
                             selectedPureLabel)
    #set next node from list learned by remaining examples
    listNode.setNextNode(decisionListLearning(filterExamples(examples,\
                                                            selectedAttributes,\
                                                            isSelectedIncl,\
                                                            selectedOp)))
    return listNode
Exemple #3
0
def decisionListLearning(examples):
    #if examples are empty then return trivial list node
    #with predicted label as 'No'/False
    if len(examples) == 0:
        return ListNode({}, {}, {}, False)
    #find a test S.T. subset of examples either all +ve or -ve
    (selectedAttributes, isSelectedIncl, selectedPureLabel, selectedOp) \
        = findTest(examples, Attribute.getAttributes())
    if not len(selectedAttributes):
        raise(DListCreationErr('couldn\'t find attributes for even split.'))

    listNode =  ListNode(selectedAttributes, isSelectedIncl, selectedOp,\
                             selectedPureLabel)
    #set next node from list learned by remaining examples
    listNode.setNextNode(decisionListLearning(filterExamples(examples,\
                                                            selectedAttributes,\
                                                            isSelectedIncl,\
                                                            selectedOp)))
    return listNode
Exemple #4
0
def decisionTreeLearning(examples, attribs, default):
    if len(examples) == 0:
        #if example list empty then return the majority value
        #computed previously
        return TreeNode(None, default)
    elif checkIfSameClass(examples):
        #if all examples same class then return the class
        return TreeNode(None, examples[0].label)
    elif len(attribs) == 0:
        #if attributes empty then return majority value
        return TreeNode(None, majorityValue(examples))
    else:
        bestAttrib = chooseAttribute(attribs, examples)
        tree = TreeNode(bestAttrib)
        majorityVal = majorityValue(examples)
        #for each possible value of attribute
        for attribVal in Attribute.getAttributes()[bestAttrib]:
            filteredExamples = filterExamples(bestAttrib, attribVal, examples)
            subTree = decisionTreeLearning(filteredExamples, \
                         filterAttributes(attribs, bestAttrib), majorityVal)
            tree.addSubtree(attribVal, subTree)
        return tree
Exemple #5
0
def main():
    examples = []
    examples.append(Example({'alt':True, 'bar':False, 'fri':False, 'hun':True,\
                                 'pat':'some', 'price':'$$$', 'rain':False,\
                                 'res':True, 'type':'french', 'est':'0-10'},\
                                True))
    examples.append(Example({'alt':True, 'bar':False, 'fri':False, 'hun':True,\
                                 'pat':'full', 'price':'$', 'rain':False,\
                                 'res':False, 'type':'thai', 'est':'30-60'},\
                                False))
    examples.append(Example({'alt':False, 'bar':True, 'fri':False, 'hun':False,\
                                 'pat':'some', 'price':'$', 'rain':False,\
                                 'res':False, 'type':'burger', 'est':'0-10'},\
                                True))
    examples.append(Example({'alt':True, 'bar':False, 'fri':True, 'hun':True,\
                                 'pat':'full', 'price':'$', 'rain':True,\
                                 'res':False, 'type':'thai', 'est':'10-30'},\
                                True))
    examples.append(Example({'alt':True, 'bar':False, 'fri':True, 'hun':False,\
                                 'pat':'full', 'price':'$$$', 'rain':False,\
                                 'res':True, 'type':'french', 'est':'>60'},\
                                False))
    examples.append(Example({'alt':False, 'bar':True, 'fri':False, 'hun':True,\
                                 'pat':'some', 'price':'$$', 'rain':True,\
                                 'res':True, 'type':'italian', 'est':'0-10'},\
                                True))
    examples.append(Example({'alt':False, 'bar':True, 'fri':False, 'hun':False,\
                                 'pat':'none', 'price':'$', 'rain':True,\
                                 'res':False, 'type':'burger', 'est':'0-10'},\
                                False))
    examples.append(Example({'alt':False, 'bar':False, 'fri':False, 'hun':True,\
                                 'pat':'some', 'price':'$$', 'rain':True,\
                                 'res':True, 'type':'thai', 'est':'0-10'},\
                                True))
    examples.append(Example({'alt':False, 'bar':True, 'fri':True, 'hun':False,\
                                 'pat':'full', 'price':'$', 'rain':True,\
                                 'res':False, 'type':'burger', 'est':'>60'},\
                                False))
    examples.append(Example({'alt':True, 'bar':True, 'fri':True, 'hun':True,\
                                 'pat':'full', 'price':'$$$', 'rain':False,\
                                 'res':True, 'type':'italian', 'est':'10-30'},\
                                False))
    examples.append(Example({'alt':False, 'bar':False, 'fri':False, 'hun':False,\
                                 'pat':'none', 'price':'$', 'rain':False,\
                                 'res':False, 'type':'thai', 'est':'0-10'},\
                                False))
    examples.append(Example({'alt':True, 'bar':True, 'fri':True, 'hun':True,\
                                 'pat':'full', 'price':'$', 'rain':False,\
                                 'res':False, 'type':'burger', 'est':'30-60'},\
                                True))
    rootNode = decisionTreeLearning(examples, Attribute.getAttributes().keys(),\
                                        False)
    printDecisionTree(rootNode)

    print predictFromDecisionTree(rootNode, {'alt':True, 'bar':True, 'fri':True, 'hun':True,\
                                 'pat':'full', 'price':'$', 'rain':False,\
                                 'res':False, 'type':'burger', 'est':'30-60'})
    
    print '**** prediction accuracy ****'
    trainAccuracy = getAccuracy(examples, rootNode) 
    print 'training set error: ' + str(100-trainAccuracy) + '%'