Example #1
0
def BivariateMiniBatchTool(noBatches):
    batchSize = len(trainInputs) // noBatches
    regressor = linear_model.SGDRegressor(alpha=0.01,
                                          max_iter=1000,
                                          average=batchSize,
                                          shuffle=False)

    regressor.fit(trainInputs, trainOutputs)
    w0, w1, w2 = regressor.intercept_[0], regressor.coef_[0], regressor.coef_[
        1]
    w = [w0, w1, w2]
    print("-----with tool-----")
    print("Regression for attributes: GDP & Freedom")
    print("\tThe learnt model: f(X,w) = " + str(w0) + " + " + str(w1) +
          " * X1 + " + str(w2) + " * X2")

    computedTestOutputs = regressor.predict(testInputs)
    print("\tPrediction error (tool): ",
          str(mean_squared_error(testOutputs, computedTestOutputs)))
    print("\tPrediction error (manual): ",
          str(meanSquareError(testOutputs, computedTestOutputs)))

    plotDataForBi(gdpData, freedomData, outputs, w, "Train & test data")
    plotDataForBi(trainGdp, trainFreedom, trainOutputs, w,
                  "Train data and the learnt model")
    plotData2ForBi(testGdp, testFreedom, testOutputs, computedTestOutputs,
                   "Computed(green) vs real(red) test data")
Example #2
0
def UnivariateMiniBatchTool(noBatches):
    batchSize = len(trainGdp) // noBatches
    regressor = linear_model.SGDRegressor(alpha=0.01,
                                          max_iter=1000,
                                          average=batchSize,
                                          shuffle=False)

    xx = [[el] for el in trainGdp]
    regressor.fit(xx, trainOutputs)
    w0, w1 = regressor.intercept_[0], regressor.coef_[0]
    w = [w0, w1]
    print("-----with tool-----")
    print("Regression for attribute: GDP")
    print("\tThe learnt model: f(X,w) = " + str(w0) + " + " + str(w1) + " * X")

    computedOutputs = regressor.predict([[x] for x in testGdp])
    print("\tPrediction error (tool): ",
          str(mean_squared_error(testOutputs, computedOutputs)))
    print("\tPrediction error (manual): ",
          str(meanSquareError(testOutputs, computedOutputs)))

    plotDataForUni(gdpData, outputs, w, "Train & test data")
    plotDataForUni(trainGdp, trainOutputs, w,
                   "Train data and the learnt model")
    plotData2ForUni(testGdp, testOutputs, computedOutputs,
                    "Computed vs real test data")
Example #3
0
def main():
    currDir = os.getcwd()
    path = os.path.join(currDir, '2017.csv')

    input, output = readData(path, 'Economy..GDP.per.Capita.', 'Freedom',
                             'Happiness.Score')

    # split in 80/20 percent
    np.random.seed(5)
    indexes = [i for i in range(len(input))]
    trainSample = np.random.choice(indexes,
                                   int(0.8 * len(input)),
                                   replace=False)
    testSample = [i for i in indexes if not i in trainSample]

    trainInputs = [input[i] for i in trainSample]
    trainOutputs = [output[i] for i in trainSample]

    testInputs = [input[i] for i in testSample]
    testOutputs = [output[i] for i in testSample]

    print('=== SKLEARN MODEL ===')
    tool_regressor = tool_regression(trainInputs, trainOutputs)
    # print('Tool predict ' + str(tool_regressor.predict(testInputs)))
    print('\n\n=== MY MODEL ===')
    manual_regressor = manual_regression(trainInputs, trainOutputs)
    # print('Manual predict ' + str(manual_regressor.predict(testInputs)))

    print('\n\n===Performance===')
    print('Tool prediction error:   ',
          mean_squared_error(testOutputs, tool_regressor.predict(testInputs)))
    print('Manual prediction error: ',
          meanSquareError(manual_regressor, testInputs, testOutputs))

    plotDataHistogram([input[i][0] for i in range(0, len(trainInputs))],
                      'capita GDP')
    plotDataHistogram([input[i][1] for i in range(0, len(trainInputs))],
                      'freedom')
    plotDataHistogram(trainOutputs, 'Happiness score')

    plotData([trainInputs[i][0] for i in range(0, len(trainInputs))],
             [trainInputs[i][1] for i in range(0, len(trainOutputs))],
             trainOutputs, [
                 manual_regressor.intercept_, manual_regressor.coef_[0],
                 manual_regressor.coef_[1]
             ], 'TRAIN BASED ON LEARNT MODEL')

    plotData([testInputs[i][0] for i in range(0, len(testInputs))],
             [testInputs[i][1] for i in range(0, len(testInputs))],
             testOutputs, [
                 manual_regressor.intercept_, manual_regressor.coef_[0],
                 manual_regressor.coef_[1]
             ], 'TEST BASED ON LEARNT MODEL')

    predictedPlot([testInputs[i][0] for i in range(0, len(testInputs))],
                  [testInputs[i][1]
                   for i in range(0, len(testInputs))], testOutputs,
                  manual_regressor.predict(testInputs),
                  'PREDICTED BASED ON LEARNT MODEL')
Example #4
0
def BivariateStochastic():
    regressor = MySGDRegression()

    regressor.fit(trainInputs, trainOutputs)
    w0, w1, w2 = regressor.intercept_, regressor.coef_[0], regressor.coef_[1]
    w = [w0, w1, w2]
    print("-----manual stochastic-----")
    print("Regression for attributes: GDP & Freedom")
    print("\tThe learnt model: f(X,w) = " + str(w0) + " + " + str(w1) +
          " * X1 + " + str(w2) + " * X2")

    computedTestOutputs = regressor.predict(testInputs)
    print("\tPrediction error (tool): ",
          str(mean_squared_error(testOutputs, computedTestOutputs)))
    print("\tPrediction error (manual): ",
          str(meanSquareError(testOutputs, computedTestOutputs)))

    plotDataForBi(gdpData, freedomData, outputs, w, "Train & test data")
    plotDataForBi(trainGdp, trainFreedom, trainOutputs, w,
                  "Train data and the learnt model")
    plotData2ForBi(testGdp, testFreedom, testOutputs, computedTestOutputs,
                   "Computed(green) vs real(red) test data")
Example #5
0
def UnivariateStochastic():
    regressor = MySGDRegression()

    xx = [[el] for el in trainGdp]
    regressor.fit(xx, trainOutputs)
    w0, w1 = regressor.intercept_, regressor.coef_[0]
    w = [w0, w1]
    print("-----manual stochastic-----")
    print("Regression for attribute: GDP")
    print("\tThe learnt model: f(X,w) = " + str(w0) + " + " + str(w1) + " * X")

    computedOutputs = regressor.predict([[x] for x in testGdp])
    print("\tPrediction error (tool): ",
          str(mean_squared_error(testOutputs, computedOutputs)))
    print("\tPrediction error (manual): ",
          str(meanSquareError(testOutputs, computedOutputs)))

    plotDataForUni(gdpData, outputs, w, "Train & test data")
    plotDataForUni(trainGdp, trainOutputs, w,
                   "Train data and the learnt model")
    plotData2ForUni(testGdp, testOutputs, computedOutputs,
                    "Computed vs real test data")
Example #6
0
    # manual
    regressor2 = MyLinearBivariateRegression()
    regressor2.fit(trainInputs, trainOutputs)
    wPrim = [regressor2.intercept_, regressor2.coef_[0], regressor2.coef_[1]]
    print("-----manual-----")
    print("The learnt model: f(X,w) = " + str(wPrim[0]) + " + " +
          str(wPrim[1]) + " * X1 + " + str(wPrim[2]) + " * X2")

    # print("Real: " + str(testOutputs))
    # print("Computed: " + str(list(regressor2.predict(testInputs))))

    print("-----performance-----")
    print("Prediction error (tool): ",
          str(mean_squared_error(testOutputs, regressor2.predict(testInputs))))
    print("Prediction error (manual): ",
          str(meanSquareError(testOutputs, regressor2.predict(testInputs))))

    plotDataHistogram(gdpData, 'GDP')
    plotDataHistogram(freedomData, 'Freedom')
    plotDataHistogram(outputs, 'Happiness score')

    # for train and test data
    plotData(gdpData, freedomData, outputs, w, "Train & test data")

    # for train data
    plotData(trainGdp, trainFreedom, trainOutputs, w,
             "Train data and the learnt model")

    # for test data
    computedTestOutputs = regressor2.predict(testInputs)
    plotData2(testGdp, testFreedom, testOutputs, computedTestOutputs,
Example #7
0
def main():
    currDir = os.getcwd()
    path = os.path.join(currDir, 'data.csv')

    input, output = readData(path, 'Economy..GDP.per.Capita.', 'Freedom',
                             'Happiness.Score')

    # split in 80/20 percent
    np.random.seed(5)
    indexes = [i for i in range(len(input))]
    trainSample = np.random.choice(indexes,
                                   int(0.8 * len(input)),
                                   replace=False)
    testSample = [i for i in indexes if not i in trainSample]

    trainInputs = [input[i] for i in trainSample]
    trainOutputs = [output[i] for i in trainSample]

    testInputs = [input[i] for i in testSample]
    testOutputs = [output[i] for i in testSample]

    #data normalization
    trainInputs, testInputs, trainOutputs, testOutputs = executeNormalization(
        trainInputs, testInputs, trainOutputs, testOutputs)

    #tool univariate
    GDPTrainInputs = [trainInputs[0] for _ in range(len(trainInputs))]
    GDPTestInputs = [testInputs[0] for _ in range(len(testInputs))]

    print("         UNIVARIATE")
    print("SKLEARN REGRESSION")
    regressorSklearnUni = linear_model.SGDRegressor(alpha=0.005,
                                                    max_iter=1000,
                                                    average=len(trainInputs))
    regressorSklearnUni.fit(GDPTrainInputs, trainOutputs)
    w = [regressorSklearnUni.intercept_[0], regressorSklearnUni.coef_[0]]
    print("Learnt model is: f(x) = " + str(w[0]) + " + " + str(w[1]) + " * x")

    print("MANUAL REGRESSION")
    regressorMySGDRegression = MySGDRegression()
    regressorMySGDRegression.fit(GDPTrainInputs, trainOutputs)
    w = [
        regressorMySGDRegression.intercept_, regressorMySGDRegression.coef_[0]
    ]
    print("Learnt model is: f(x) = " + str(w[0]) + " + " + str(w[1]) + " * x")

    print("\n\n         BIVARIATE")
    print("SKLEARN REGRESSION")
    toolRegression = tool_regression(trainInputs, trainOutputs)
    print("MANUAL REGRESSION")
    manual_regressor = manual_regression(trainInputs, trainOutputs)

    print("\n\n ERRORS")
    print(
        "1.TOOL UNIVARIATE ERROR:   ",
        mean_squared_error(testOutputs, toolRegression.predict(GDPTestInputs)))
    print("2.MANUAL UNIVARIATE ERROR: ",
          meanSquareError(manual_regressor, GDPTestInputs, testOutputs))
    print("3.TOOL BIVARIATE ERROR:    ",
          mean_squared_error(testOutputs, toolRegression.predict(testInputs)))
    print("4.MANUAL BIVARIATE ERROR:  ",
          meanSquareError(manual_regressor, testInputs, testOutputs))