def test_sgd(self): dataset = regression.get_dataset(self.BODYFAT_FILE) capturedOutput = io.StringIO() sys.stdout = capturedOutput regression.sgd(dataset, cols=[2, 3], betas=[0, 0, 0], T=5, eta=1e-6) output = capturedOutput.getvalue() output_lines = output.split('\n') sys.stdout = sys.__stdout__ expected_lines = [ "1 387.33 0.00 0.00 0.00", "2 379.60 0.00 0.00 0.01", "3 335.99 0.00 0.00 0.01", "4 285.89 0.00 0.00 0.02", "5 245.75 0.00 0.01 0.03" ] for out_line, exp_line in zip(output_lines, expected_lines): self.assertEqual(out_line.rstrip(), exp_line)
from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import matplotlib.ticker as mtick import numpy as np if __name__ == "__main__": X, y = regression.loadDataSet('data/ex1.txt'); m,n = X.shape X = np.concatenate((np.ones((m,1)), X), axis=1) rate = 0.01 maxLoop = 100 epsilon =0.01 result, timeConsumed = regression.sgd(rate, maxLoop, epsilon, X, y) theta, errors, thetas = result # 绘制拟合曲线 fittingFig = plt.figure() title = 'sgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed) ax = fittingFig.add_subplot(111, title=title) trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0]) xCopy = X.copy() xCopy.sort(0) yHat = xCopy*theta fittingLine, = ax.plot(xCopy[:,1], yHat, color='g') ax.set_xlabel('Population of City in 10,000s')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat May 12 06:19:52 2018 @author: kukuLife """ import numpy as np import regression X, y = regression.loadDataSet('ex1.txt') m, n = X.shape X = np.concatenate((np.ones((m, 1)), X), axis=1) maxLoop = 1500 epsilon = 0.01 rate = 0.02 theta, thetas, errors = regression.sgd(maxLoop, rate, X, y, epsilon)
# Linear regression print('Regression:') testData = re.read_csv("regression/test.csv") trainData = re.read_csv("regression/train.csv") # Batch gradient descent algorithm BGD1 = re.bgd(0.25, trainData['x'], trainData['y']) costTest = re.cost(BGD1['weight'], testData['x'], testData['y']) print('Batch gradient descent algorithm:') print('Final cost function value of the training data:', BGD1['cost'][-1]) print('Final cost function value of the test data:', costTest) print('r:', BGD1['r']) print('Final weight:', BGD1['weight']) # Stochastic gradient descent (SGD) algorithm SGD1 = re.sgd(0.25, trainData['x'], trainData['y']) costTest = re.cost(SGD1['weight'], testData['x'], testData['y']) print('Stochastic gradient descent algorithm:') print('Final cost function value of the training data:', SGD1['cost'][-1]) print('Final cost function value of the test data:', costTest) print('r:', SGD1['r']) print('Final weight:', SGD1['weight']) # optimal weight vector with analytical form w = re.analytical(trainData['x'], trainData['y']) cost_train = re.cost(w, trainData['x'], trainData['y']) cost_test = re.cost(w, testData['x'], testData['y']) print('The optimal weight vector with an analytical form:', w) print('The cost function value of the training data:', cost_train) print('The cost function value of the test data:', cost_test)