def BivariateMiniBatchTool(noBatches): batchSize = len(trainInputs) // noBatches regressor = linear_model.SGDRegressor(alpha=0.01, max_iter=1000, average=batchSize, shuffle=False) regressor.fit(trainInputs, trainOutputs) w0, w1, w2 = regressor.intercept_[0], regressor.coef_[0], regressor.coef_[ 1] w = [w0, w1, w2] print("-----with tool-----") print("Regression for attributes: GDP & Freedom") print("\tThe learnt model: f(X,w) = " + str(w0) + " + " + str(w1) + " * X1 + " + str(w2) + " * X2") computedTestOutputs = regressor.predict(testInputs) print("\tPrediction error (tool): ", str(mean_squared_error(testOutputs, computedTestOutputs))) print("\tPrediction error (manual): ", str(meanSquareError(testOutputs, computedTestOutputs))) plotDataForBi(gdpData, freedomData, outputs, w, "Train & test data") plotDataForBi(trainGdp, trainFreedom, trainOutputs, w, "Train data and the learnt model") plotData2ForBi(testGdp, testFreedom, testOutputs, computedTestOutputs, "Computed(green) vs real(red) test data")
def UnivariateMiniBatchTool(noBatches): batchSize = len(trainGdp) // noBatches regressor = linear_model.SGDRegressor(alpha=0.01, max_iter=1000, average=batchSize, shuffle=False) xx = [[el] for el in trainGdp] regressor.fit(xx, trainOutputs) w0, w1 = regressor.intercept_[0], regressor.coef_[0] w = [w0, w1] print("-----with tool-----") print("Regression for attribute: GDP") print("\tThe learnt model: f(X,w) = " + str(w0) + " + " + str(w1) + " * X") computedOutputs = regressor.predict([[x] for x in testGdp]) print("\tPrediction error (tool): ", str(mean_squared_error(testOutputs, computedOutputs))) print("\tPrediction error (manual): ", str(meanSquareError(testOutputs, computedOutputs))) plotDataForUni(gdpData, outputs, w, "Train & test data") plotDataForUni(trainGdp, trainOutputs, w, "Train data and the learnt model") plotData2ForUni(testGdp, testOutputs, computedOutputs, "Computed vs real test data")
def main(): currDir = os.getcwd() path = os.path.join(currDir, '2017.csv') input, output = readData(path, 'Economy..GDP.per.Capita.', 'Freedom', 'Happiness.Score') # split in 80/20 percent np.random.seed(5) indexes = [i for i in range(len(input))] trainSample = np.random.choice(indexes, int(0.8 * len(input)), replace=False) testSample = [i for i in indexes if not i in trainSample] trainInputs = [input[i] for i in trainSample] trainOutputs = [output[i] for i in trainSample] testInputs = [input[i] for i in testSample] testOutputs = [output[i] for i in testSample] print('=== SKLEARN MODEL ===') tool_regressor = tool_regression(trainInputs, trainOutputs) # print('Tool predict ' + str(tool_regressor.predict(testInputs))) print('\n\n=== MY MODEL ===') manual_regressor = manual_regression(trainInputs, trainOutputs) # print('Manual predict ' + str(manual_regressor.predict(testInputs))) print('\n\n===Performance===') print('Tool prediction error: ', mean_squared_error(testOutputs, tool_regressor.predict(testInputs))) print('Manual prediction error: ', meanSquareError(manual_regressor, testInputs, testOutputs)) plotDataHistogram([input[i][0] for i in range(0, len(trainInputs))], 'capita GDP') plotDataHistogram([input[i][1] for i in range(0, len(trainInputs))], 'freedom') plotDataHistogram(trainOutputs, 'Happiness score') plotData([trainInputs[i][0] for i in range(0, len(trainInputs))], [trainInputs[i][1] for i in range(0, len(trainOutputs))], trainOutputs, [ manual_regressor.intercept_, manual_regressor.coef_[0], manual_regressor.coef_[1] ], 'TRAIN BASED ON LEARNT MODEL') plotData([testInputs[i][0] for i in range(0, len(testInputs))], [testInputs[i][1] for i in range(0, len(testInputs))], testOutputs, [ manual_regressor.intercept_, manual_regressor.coef_[0], manual_regressor.coef_[1] ], 'TEST BASED ON LEARNT MODEL') predictedPlot([testInputs[i][0] for i in range(0, len(testInputs))], [testInputs[i][1] for i in range(0, len(testInputs))], testOutputs, manual_regressor.predict(testInputs), 'PREDICTED BASED ON LEARNT MODEL')
def BivariateStochastic(): regressor = MySGDRegression() regressor.fit(trainInputs, trainOutputs) w0, w1, w2 = regressor.intercept_, regressor.coef_[0], regressor.coef_[1] w = [w0, w1, w2] print("-----manual stochastic-----") print("Regression for attributes: GDP & Freedom") print("\tThe learnt model: f(X,w) = " + str(w0) + " + " + str(w1) + " * X1 + " + str(w2) + " * X2") computedTestOutputs = regressor.predict(testInputs) print("\tPrediction error (tool): ", str(mean_squared_error(testOutputs, computedTestOutputs))) print("\tPrediction error (manual): ", str(meanSquareError(testOutputs, computedTestOutputs))) plotDataForBi(gdpData, freedomData, outputs, w, "Train & test data") plotDataForBi(trainGdp, trainFreedom, trainOutputs, w, "Train data and the learnt model") plotData2ForBi(testGdp, testFreedom, testOutputs, computedTestOutputs, "Computed(green) vs real(red) test data")
def UnivariateStochastic(): regressor = MySGDRegression() xx = [[el] for el in trainGdp] regressor.fit(xx, trainOutputs) w0, w1 = regressor.intercept_, regressor.coef_[0] w = [w0, w1] print("-----manual stochastic-----") print("Regression for attribute: GDP") print("\tThe learnt model: f(X,w) = " + str(w0) + " + " + str(w1) + " * X") computedOutputs = regressor.predict([[x] for x in testGdp]) print("\tPrediction error (tool): ", str(mean_squared_error(testOutputs, computedOutputs))) print("\tPrediction error (manual): ", str(meanSquareError(testOutputs, computedOutputs))) plotDataForUni(gdpData, outputs, w, "Train & test data") plotDataForUni(trainGdp, trainOutputs, w, "Train data and the learnt model") plotData2ForUni(testGdp, testOutputs, computedOutputs, "Computed vs real test data")
# manual regressor2 = MyLinearBivariateRegression() regressor2.fit(trainInputs, trainOutputs) wPrim = [regressor2.intercept_, regressor2.coef_[0], regressor2.coef_[1]] print("-----manual-----") print("The learnt model: f(X,w) = " + str(wPrim[0]) + " + " + str(wPrim[1]) + " * X1 + " + str(wPrim[2]) + " * X2") # print("Real: " + str(testOutputs)) # print("Computed: " + str(list(regressor2.predict(testInputs)))) print("-----performance-----") print("Prediction error (tool): ", str(mean_squared_error(testOutputs, regressor2.predict(testInputs)))) print("Prediction error (manual): ", str(meanSquareError(testOutputs, regressor2.predict(testInputs)))) plotDataHistogram(gdpData, 'GDP') plotDataHistogram(freedomData, 'Freedom') plotDataHistogram(outputs, 'Happiness score') # for train and test data plotData(gdpData, freedomData, outputs, w, "Train & test data") # for train data plotData(trainGdp, trainFreedom, trainOutputs, w, "Train data and the learnt model") # for test data computedTestOutputs = regressor2.predict(testInputs) plotData2(testGdp, testFreedom, testOutputs, computedTestOutputs,
def main(): currDir = os.getcwd() path = os.path.join(currDir, 'data.csv') input, output = readData(path, 'Economy..GDP.per.Capita.', 'Freedom', 'Happiness.Score') # split in 80/20 percent np.random.seed(5) indexes = [i for i in range(len(input))] trainSample = np.random.choice(indexes, int(0.8 * len(input)), replace=False) testSample = [i for i in indexes if not i in trainSample] trainInputs = [input[i] for i in trainSample] trainOutputs = [output[i] for i in trainSample] testInputs = [input[i] for i in testSample] testOutputs = [output[i] for i in testSample] #data normalization trainInputs, testInputs, trainOutputs, testOutputs = executeNormalization( trainInputs, testInputs, trainOutputs, testOutputs) #tool univariate GDPTrainInputs = [trainInputs[0] for _ in range(len(trainInputs))] GDPTestInputs = [testInputs[0] for _ in range(len(testInputs))] print(" UNIVARIATE") print("SKLEARN REGRESSION") regressorSklearnUni = linear_model.SGDRegressor(alpha=0.005, max_iter=1000, average=len(trainInputs)) regressorSklearnUni.fit(GDPTrainInputs, trainOutputs) w = [regressorSklearnUni.intercept_[0], regressorSklearnUni.coef_[0]] print("Learnt model is: f(x) = " + str(w[0]) + " + " + str(w[1]) + " * x") print("MANUAL REGRESSION") regressorMySGDRegression = MySGDRegression() regressorMySGDRegression.fit(GDPTrainInputs, trainOutputs) w = [ regressorMySGDRegression.intercept_, regressorMySGDRegression.coef_[0] ] print("Learnt model is: f(x) = " + str(w[0]) + " + " + str(w[1]) + " * x") print("\n\n BIVARIATE") print("SKLEARN REGRESSION") toolRegression = tool_regression(trainInputs, trainOutputs) print("MANUAL REGRESSION") manual_regressor = manual_regression(trainInputs, trainOutputs) print("\n\n ERRORS") print( "1.TOOL UNIVARIATE ERROR: ", mean_squared_error(testOutputs, toolRegression.predict(GDPTestInputs))) print("2.MANUAL UNIVARIATE ERROR: ", meanSquareError(manual_regressor, GDPTestInputs, testOutputs)) print("3.TOOL BIVARIATE ERROR: ", mean_squared_error(testOutputs, toolRegression.predict(testInputs))) print("4.MANUAL BIVARIATE ERROR: ", meanSquareError(manual_regressor, testInputs, testOutputs))