def five_fold(): #7c errors = [] iteration_count = 10 sub_iteration_count = 5 for i in xrange(iteration_count): samples_count = 500 testing_samples_count = 100 training_samples_count = samples_count - testing_samples_count lmda = np.exp(-2) x, y, v = data_generator2(samples_count, gen_coefficients()) permuted = np.random.permutation(zip(x.T, y)) mse = 0 for j in xrange(sub_iteration_count): testXtrans, testY = zip(*permuted[j*testing_samples_count:(j+1)*testing_samples_count]) testX = np.array(testXtrans).T testY = np.array(testY) permutedTrain = np.append(permuted[0:j*testing_samples_count], permuted[(j+1)*testing_samples_count:], axis=0) trainXtrans, trainY = zip(*permutedTrain) trainX = np.array(trainXtrans).T trainY = np.array(trainY) mse += calculate_MSE(trainX, trainY, testX, testY, lmda) / sub_iteration_count #7d errors.append(mse) print "Five fold average MSE: ", sum(errors)/len(errors) #7e print "Five fold variance: ", np.var(errors)
def holdout(): errors = [] iteration_count = 10 #7a for i in xrange(iteration_count): samples_count = 500 testing_samples_count = 100 training_samples_count = samples_count - testing_samples_count lmda = np.exp(-2) x, y, v = data_generator2(samples_count, gen_coefficients()) permuted = np.random.permutation(zip(x.T, y)) testXtrans, testY = zip(*permuted[0:testing_samples_count]) testX = np.array(testXtrans).T testY = np.array(testY) trainXtrans, trainY = zip(*permuted[testing_samples_count:]) trainX = np.array(trainXtrans).T trainY = np.array(trainY) mse = calculate_MSE(trainX, trainY, testX, testY, lmda) errors.append(mse) print "Holdout average MSE: ", sum(errors)/len(errors) #7b print "Holdout variance: ", np.var(errors)