def kFoldErrorChoose(x,y,maxOrder,k): e = [0 for i in range(0,maxOrder)] d = kSplit([x,y],k) # pdb.set_trace() for order in range(1,maxOrder+1): sumError = 0 for i in range(0,k): #The current partition to use: the ith partition is used as test data. Dcopy = copy.copy(d) dtest = Dcopy.pop(i) dtrain = Dcopy[0] f = Regression.polyTrain(dtrain[0],dtrain[1],order) sumError += meanSquaredError(dtest[0],dtest[1],f) e[order-1] = sumError/(k * 1.0) return min(e[i] for i in range(0,len(e))),(argmin(e)+1)
raise Exception("Invalid command line argument") #In the following, D is the data set which has all the x values as its first entry and the y values as its second. error,order = CV.kFoldErrorChoose(D[0],D[1],10,5) #Graph the points on the base polynomial Graph.lineColor(D[0],D[1],'red') #Add Gaussian noise to the data outputs D[1] = Data.addGaussianNoise(D[1],1.0/2000) #Graph them as points in blue Graph.pointsSimple(D[0],D[1]) #Estimate the coefficients of the polynomial with best order fit = Regression.polyTrain(D[0],D[1],order) #Get the function's estimates for the training x values z = [fit(i) for i in D[0]] #Graph the points Graph.lineColor(D[0],z,'g') #Show the plot Graph.show() if(len(sys.argv) == 1): print "True function was an order " + str(trueOrder) + " polynomial, fit with order " + str(order)
def squaredErrorChoose(x,y,maxOrder): e = [0 for i in range(0,maxOrder)] for order in range(1,maxOrder+1): f = Regression.polyTrain(x,y,order) e[order-1] = meanSquaredError(x,y,f) return min(e[i] for i in range(0,len(e))),(argmin(e)+1)