def linearRegression(inputFiles, i = 1, quarters = 4, dataReduction = False): k = 1 regr = linear_model.LinearRegression(fit_intercept=False) for File in inputFiles: data = tools.readData(File) data [np.argsort(data[:, 0])] limit = quarters * (len(data)/4) Z = tools.createZ(data[:, 0], i) theta = regress(Z, data[:, 1]) Y_hat = YHat(theta, data[:, 0]) plt.subplot(2,2,k) plt.scatter(data[:, 0], data[:, 1], color="green") X = data[:, 0] plt.plot(X, Y_hat, color="red", lw=3, label = "Original Method") k = k + 1 if (dataReduction == False): regr.fit(Z, data[:, 1]) #plt.plot(X, regr.predict(Z), color="blue", lw="1", label ="Python functions") else: Z = tools.createZ(data[0:limit, 0], i) theta = regress(Z, data[0:limit, 1]) Y_hat_small = YHat(theta, data[:, 0]) plt.plot(X, Y_hat_small, color="blue", lw = 1, label = "Reduced Data Set") plt.title("Reduced Data %sn/4" % quarters) plt.suptitle("Single Variable Degree: %s" % i) plt.show()
def linearRegressionKFold(inputFiles, i=1): print "\nSingle Variable, Degree: %s" % i print "###########################" for File in inputFiles: print "===========================" print "Data Set %s" % File data = tools.readData(File) X = data[:, 0] Y = data[:, 1] kf = KFold(len(data), n_folds=10, shuffle=True) TrainError = 0 TestError = 0 for train, test in kf: Z = tools.createZ(X[train], i) theta = regress(Z, Y[train]) Y_hat = YHat(theta, X[train]) Y_hat_test = YHat(theta, X[test]) TrainError = TrainError + tools.findError(theta, Y[train]) TestError = TestError + tools.findError(theta, Y[test]) TestError = TestError / len(kf) TrainError = TrainError / len(kf) print "---------------------------" print "Test Error: %s" % TestError print "Train Error: %s" % TrainError py_linearRegression(X, Y) return TestError