Esempio n. 1
0
def cross_Valid(egs, numCross, B):
    crossData = ut.dataCrossSplit(egs, numCross, False)
    fold = 0
    errors = {}
    for x in B:
        errors[x] = [0] * numCross
    for i in range(numCross):
        data = crossData[i]
        print "# Fold", fold
        for b in B:
            print "  ", b, "Bases:",
            err = calcError(data[0], data[1], b)
            print err
            errors[b][fold] = err
        fold = fold + 1
    return errors
Esempio n. 2
0
def cross_vad(examples, T, num_folds=10):
    data = ut.dataCrossSplit(examples, num_folds, False)
    errorRates = []
    for i in range(num_folds):
        egs = data[i]
        classifier = LogitBoost(egs[0], T)
        # calculate error rate
        error = [0.0] * 2
        for j in range(2):
            for x in egs[j]:
                if classifier(x) != x[0]:
                    error[j] = error[j] + 1
            error[j] = error[j] / len(egs[j])
        print "Fold ", i, " trainingData errorRate: ", error[0], " testData errorRate:", error[1]
        errorRates.append(error)
    arr = np.array(errorRates)
    print "Train Mean ErrorRate:", np.mean(arr[:, 0]), " Test Mean ErrorRate:", np.mean(arr[:, 1])
    print "Train StdVar ErrorRate:", np.sqrt(np.var(arr[:, 0])), " Test Mean ErrorRate:", np.sqrt(np.var(arr[:, 1]))
Esempio n. 3
0
def cross_vad(examples, num_folds = 10):
    data = ut.dataCrossSplit(examples, num_folds, False)
    errorRates = []
    for i in range(num_folds):
        egs = data[i]
        dt = DTree(SelectAtt)
        dt.training(egs[0], 1)
        # calculate error rate
        error = [0.] * 2
        for j in range(2):
            for x in egs[j]:
                if dt.predict(x) != x[0]:
                    error[j] = error[j] + 1
            error[j] = error[j] / len(egs[j])
        print "Fold ", i, " trainingData errorRate: ", error[0], " testData errorRate:", error[1]
        errorRates.append(error)
    arr = np.array(errorRates)
    print "Train Mean ErrorRate:", np.mean(arr[:,0]), " Test Mean ErrorRate:", np.mean(arr[:,1])
    print "Train StdVar ErrorRate:", np.sqrt(np.var(arr[:,0])), " Test Mean ErrorRate:", np.sqrt(np.var(arr[:, 1]))
Esempio n. 4
0
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 20 22:55:53 2013

@author: jiecaoc
"""

import utilities as ut
import classes as cls

egs = ut.importRawData('Ionosphere.csv')


raw = ut.dataCrossSplit(egs, 2, False)
dt = cls.DTree(cls.SelectAtt)
dt.training(raw[0][0])
c = 0
for eg in raw[0][1]:
    if eg[0] != dt.predict(eg):
        # print eg[0], dt.predict(eg)
        c = c + 1
print (c + .0) / len(egs)
Esempio n. 5
0
        for k in range(1, len(crossData[i][1]) + 1):
            for x in crossData[i][1][k]:
                totNum = totNum + 1
                if cl.classify(x) != k:
                    totError = totError + 1
    return (totError + 0.) / totNum
            

# check input
if len(sys.argv) != 3:  
    print "input error"
    exit()
fileName = sys.argv[1]
if fileName == "Iris.csv":
    subDim = 3
else:
    subDim = 9
num_cross = int(sys.argv[2])
# import data
# since the date in the file are in order
# need to make it random to run cross validation
rawData = ut.makeDataRandom(ut.importRawData(fileName))
rawData = ut.importRawData(fileName)
trainData = ut.dataCrossSplit(rawData, num_cross)



#print tmp[1]
print "Data: ", fileName
print "Error rate for cross_validation:", cross_validation(trainData, subDim)
Esempio n. 6
0
def leastSquare(trainData, K = 3):
    """
        give training data,
        return f(x), defined as f(x) = W'x
    """
    # construct Y
    Y = np.array([classEncode(x[0]) for x in trainData])
    # construct X
    X = np.array([ x[1:] for x in trainData])
    X_ = np.dot( np.linalg.inv( np.dot(X.transpose(), X) ), X.transpose() )
    W_T = np.dot(X_, Y).transpose()
    return lambda x: np.dot(W_T, x)
    

# check input
if len(sys.argv) != 3:  
    print "input error"
    exit()
fileName = sys.argv[1]
num_cross = int(sys.argv[2])

rawData = ut.makeDataRandom(ut.importRawData(fileName))


crossData = ut.dataCrossSplit(rawData, num_cross, False)
print "Data: ", fileName
print "Error rate for cross_validation:", cross_validation(crossData)