Beispiel #1
0
    def test_sgd(self):
        dataset = regression.get_dataset(self.BODYFAT_FILE)
        capturedOutput = io.StringIO()
        sys.stdout = capturedOutput
        regression.sgd(dataset, cols=[2, 3], betas=[0, 0, 0], T=5, eta=1e-6)

        output = capturedOutput.getvalue()
        output_lines = output.split('\n')
        sys.stdout = sys.__stdout__

        expected_lines = [
            "1 387.33 0.00 0.00 0.00", "2 379.60 0.00 0.00 0.01",
            "3 335.99 0.00 0.00 0.01", "4 285.89 0.00 0.00 0.02",
            "5 245.75 0.00 0.01 0.03"
        ]

        for out_line, exp_line in zip(output_lines, expected_lines):
            self.assertEqual(out_line.rstrip(), exp_line)
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

if __name__ == "__main__":
    X, y = regression.loadDataSet('data/ex1.txt');

    m,n = X.shape
    X = np.concatenate((np.ones((m,1)), X), axis=1)

    rate = 0.01
    maxLoop = 100
    epsilon =0.01

    result, timeConsumed = regression.sgd(rate, maxLoop, epsilon, X, y)

    theta, errors, thetas = result

    # 绘制拟合曲线
    fittingFig = plt.figure()
    title = 'sgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)
    ax = fittingFig.add_subplot(111, title=title)
    trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0])

    xCopy = X.copy()
    xCopy.sort(0)
    yHat = xCopy*theta
    fittingLine, = ax.plot(xCopy[:,1], yHat, color='g')

    ax.set_xlabel('Population of City in 10,000s')
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat May 12 06:19:52 2018

@author: kukuLife
"""
import numpy as np
import regression

X, y = regression.loadDataSet('ex1.txt')
m, n = X.shape
X = np.concatenate((np.ones((m, 1)), X), axis=1)

maxLoop = 1500
epsilon = 0.01
rate = 0.02

theta, thetas, errors = regression.sgd(maxLoop, rate, X, y, epsilon)
# Linear regression
print('Regression:')
testData = re.read_csv("regression/test.csv")
trainData = re.read_csv("regression/train.csv")

# Batch gradient descent algorithm
BGD1 = re.bgd(0.25, trainData['x'], trainData['y'])
costTest = re.cost(BGD1['weight'], testData['x'], testData['y'])
print('Batch gradient descent algorithm:')
print('Final cost function value of the training data:', BGD1['cost'][-1])
print('Final cost function value of the test data:', costTest)
print('r:', BGD1['r'])
print('Final weight:', BGD1['weight'])

# Stochastic gradient descent (SGD) algorithm
SGD1 = re.sgd(0.25, trainData['x'], trainData['y'])
costTest = re.cost(SGD1['weight'], testData['x'], testData['y'])
print('Stochastic gradient descent algorithm:')
print('Final cost function value of the training data:', SGD1['cost'][-1])
print('Final cost function value of the test data:', costTest)
print('r:', SGD1['r'])
print('Final weight:', SGD1['weight'])

# optimal weight vector with analytical form
w = re.analytical(trainData['x'], trainData['y'])
cost_train = re.cost(w, trainData['x'], trainData['y'])
cost_test = re.cost(w, testData['x'], testData['y'])
print('The optimal weight vector with an analytical form:', w)
print('The cost function value of the training data:', cost_train)
print('The cost function value of the test data:', cost_test)