if __name__ == '__main__' :
    # for loading the existing data set pickle 
    # pickledata = open('pickledata','rb')
    # attrs = pickle.load(pickledata)
    # data = pickle.load(pickledata)

    # for loading the new data set
    # dataset = 'tennis.arff'
    # dataset = 'restaurant.arff'
    dataset = 'breast-cancer.arff'
    # dataset = 'nursery.arff'
    # dataset = 'lymphography.arff'

    
    attrs, data = readARFF.readArff(open(dataset))
    print '#####   %s dataset   #####' %dataset
# 
    # print len(data)
    # attrslist = readARFF.getAttrList(attrs)
    # root = makeTree(data, attrslist, attrs, readARFF.computeZeroR(data))
    # root.printTree()

    evaluation(5, attrs, data)
    evaluation_zeroR(5, data)





        p = computePrecision(TPCount,FPCount,TNCount,FNCount)
        r = computeRecall(TPCount,FPCount,TNCount,FNCount)
        a = computeAccuracy(TPCount,FPCount,TNCount,FNCount)
        evalResult[c] = (p,r,a)

    drawChart(evalResult)
    return evalResult



if __name__ == '__main__' :
    if len(sys.argv) < 2 :
        print "Usage: decisionTree.py #datasetName"
        sys.exit(-1)
    fname = sys.argv[-1]
    (attrs, data, classification) = readARFF.readArff(open(fname))

    resultTest = {}
    resultTrain = {}
    resultZeroR = {}
    for time in range(5):
        print "Round ",time+1,":"
        index = range(len(data))
        trainSample = random.sample(index,int(len(data)*0.8))
        testSample = [i for i in index if i not in trainSample]

        trainDataset = [data[i] for i in trainSample]
        testDataset = [data[i] for i in testSample]

        print "\nUsing ZeroR:"
        rz = evalZeroR(trainDataset,testDataset,classification,attrs)
Beispiel #3
0
    print '     training_precision:'
    print_pr_re(train_precision_average)
    print '     training_recall:'
    print_pr_re(train_recall_average)
    print '     training_accuracy: %f%%' % (train_accuracy_average * 100)


if __name__ == '__main__':
    # for loading the existing data set pickle
    # pickledata = open('pickledata','rb')
    # attrs = pickle.load(pickledata)
    # data = pickle.load(pickledata)

    # for loading the new data set
    # dataset = 'tennis.arff'
    # dataset = 'restaurant.arff'
    dataset = 'breast-cancer.arff'
    # dataset = 'nursery.arff'
    # dataset = 'lymphography.arff'

    attrs, data = readARFF.readArff(open(dataset))
    print '#####   %s dataset   #####' % dataset
    #
    # print len(data)
    # attrslist = readARFF.getAttrList(attrs)
    # root = makeTree(data, attrslist, attrs, readARFF.computeZeroR(data))
    # root.printTree()

    evaluation(5, attrs, data)
    evaluation_zeroR(5, data)
Beispiel #4
0
        if correctPredicted[item] == 0:
            Precision[item] = 0
            Recall[item] = 0
        else:
            Precision[item] = float(correctPredicted[item]) / float(timesPredicted[item])
            Recall[item] = float(correctPredicted[item]) / float(exampleLabeled[item])
        allCorrect += correctPredicted[item]

    accuracy = float(allCorrect) / len(testingData)

    print "For multi-class -----------"
    print "Precision: ", Precision
    print "Recall: ", Recall
    print "Accuracy: ", accuracy

### This part need be modified for different dataset
def isPositive(result) :
    return "no-recurrence-events" in result

if __name__ == '__main__' :
    filename = 'nursery.arff'

    attributes = readARFF.readArff(open(filename))[0]
    data = readARFF.readArff(open(filename))[1]
    alist = readARFF.getAttrList(attributes)
    trainingData, testingData = createTrainAndTestData(data)

    #runEvaluation5times(data, attributes, None)
    #runZeroREvaluation5times(data, attributes, None)

    calculateForMultiClass(trainingData, testingData, attributes, None)
Beispiel #5
0
            else:
                node.children[value] = makeTree(subSet, aList, attributes, readARFF.computeZeroR(attributes, subSet))
        return node


def printNode(root):
    print root
    if len(root.children) != 0:
        for k in root.children:
            child = root.children[k]
            printNode(child)


if __name__ == '__main__':
    fileName = sys.argv[-1]
    attributes, data = readARFF.readArff(open(fileName))
    listAttributes = readARFF.getAttrList(attributes)
    times = 5
    total = zero = 0
    precision = recall = precisionZero = recallZero = 0
    for i in range(times):
        trainData = random.sample(data, int(len(data) * 0.8))
        defaultValue = readARFF.computeZeroR(attributes, data)
        zeroRValue = readARFF.computeZeroR(attributes, trainData)
        root = makeTree(trainData, listAttributes, attributes, defaultValue)
        #printNode(root)
        TP = tp = 0
        testData = []
        for d in data:
            if d not in trainData:
                testData.append(d)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="""
    Simply use to test:
    `python decisionTree.py ARFF_FILE`

    Wanzhang Sheng, Copyright 2013, GPL
    """)
    parser.add_argument('arff_file', help='The source ARFF file.')
    parser.add_argument('--verbose', dest='verbose', action='store_true', help="Be verbose to debug.")
    args = parser.parse_args()
    VERBOSE = args.verbose

    (attrs, data, classify_attr) = readARFF.readArff(open(args.arff_file))
    domain = classify_attr.values()[0]
    random.seed()

    print ('=' + args.arff_file).ljust(25,'=')
    total = init_statistics(domain)
    total_noise = 0
    for time in range(0,5):
        print " \033[93m-%d time-\033[0m" % (time+1)
        random.shuffle(data)
        sp = int(len(data)*4/5)
        train_data = data[:sp]
        test_data = data[sp+1:]
        root = makeTree(train_data, attrs)

        # noise