def test_grad_SEE(self): data_file = 'curvefitting.txt' for order in xrange(10): [X, Y] = hw1.getData(data_file) phi = hw1.designMatrix(X, order) weights = hw1.regressionFit(X, Y, phi) delta = 1 analytic = hw1.computeSEEGrad(X,Y, weights, order).flatten() approx = gd.gradient_approx_SEE(X, Y, weights, order, delta).flatten() for i in xrange(order): self.assertAlmostEqual(analytic[i], approx[i]) delta2 = .1 analytic2 = hw1.computeSEEGrad(X,Y, weights, order).flatten() approx2 = gd.gradient_approx_SEE(X, Y, weights, order, delta2).flatten() for i in xrange(order): self.assertAlmostEqual(analytic2[i], approx2[i]) delta3 = .01 analytic3 = hw1.computeSEEGrad(X,Y, weights, order).flatten() approx3 = gd.gradient_approx_SEE(X, Y, weights, order, delta3).flatten() for i in xrange(order): self.assertAlmostEqual(analytic3[i], approx3[i]) delta4 = .001 analytic4 = hw1.computeSEEGrad(X,Y, weights, order).flatten() approx4 = gd.gradient_approx_SEE(X, Y, weights, order, delta4).flatten() for i in xrange(order): self.assertAlmostEqual(analytic4[i], approx4[i])
def __init__(self, x0, step_size, eps, verbose=False, data_file = 'curvefitting.txt', order=0): """ Specify the initial guess, the step size and the convergence criterion""" """ Verbose prints debug messages for checking functions and things """ self.first = x0 self.step_size = step_size # eta self.eps = eps self.verbose = verbose # May not be necessary [self.X, self.Y] = hw1.getData(data_file) self.phi = hw1.designMatrix(self.X, order) self.order = order
def test_compare_derivative(self): """ Verify the gradient using the numerical derivative code """ """ Part of question 2.2 """ order = 4 [X,Y] = hw1.getData('curvefitting.txt') Phi_matrix=hw1.designMatrix(X,order) #print Phi_matrix.shape weight_vector=hw1.regressionFit(X,Y,Phi_matrix) SSEG=hw1.computeSEEGrad(X,Y,weight_vector,order) approx = gd.gradient_approx_SEE(X, Y, weight_vector, order, 1e-10) true_deriv = hw1.computeSEEGrad(X,Y, weight_vector, order) print "COMPARE approx", approx print "TRUE ", true_deriv
def test_computeSEE(self): order = 2 [X,Y] = hw1.getData('curvefitting.txt') phi_matrix = hw1.designMatrix(X,order) weights = hw1.regressionFit(X,Y,phi_matrix) hw1.computeSEE(X,Y,weights,order)
nIter += 1 fcall += 2 if df == centdiff: fcall += 2 * init.size # Tracks successive number of times that difference is below the convergence criterion if diff < crit: count += 1 else: count = 0 print "nIter: %d" % (nIter) print "Fcall: %d" % (fcall) return init X, Y = homework1.getData( '/Users/mfzhao/Downloads/6867_hw1_data/curvefitting.txt') t0 = OLS(X, Y, 0) t1 = OLS(X, Y, 1) t3 = OLS(X, Y, 3) t9 = OLS(X, Y, 9) t0 = pd.Series(t0.flatten()) t1 = pd.Series(t1.flatten()) t3 = pd.Series(t3.flatten()) t9 = pd.Series(t9.flatten()) DF = pd.DataFrame({'0': t0, '1': t1, '3': t3, '9': t9}) print DF # 2.1
def scaleFeatures(X, mean = None, sigma= None): if mean == None and sigma == None: mean = np.mean(X, axis=0) sigma = np.std(X, axis=0) meanArray = np.repeat(mean.reshape(1, -1), X.shape[0], axis=0) sigmaArray = np.repeat(sigma.reshape(1, -1), X.shape[0], axis=0) scaledFeatures = (X - meanArray)/sigmaArray return scaledFeatures, mean, sigma ########## X, Y = homework1.getData('/Users/dholtz/Downloads/6867_hw1_data/curvefitting.txt') theta = ridgeRegression(X, Y, l=0, M=10) lambdas = np.array([0, .01, .1, 1, 10, 100]) Ms = np.array([1, 2, 3, 4, 5]) X_train, Y_train = homework1.getData('/Users/dholtz/Downloads/6867_hw1_data/regress_train.txt') X_test, Y_test = homework1.getData('/Users/dholtz/Downloads/6867_hw1_data/regress_test.txt') X_validate, Y_validate = homework1.getData('/Users/dholtz/Downloads/6867_hw1_data/regress_validate.txt') theta = ridgeRegression(X_train, Y_train, M=100) predictions = predictRidge(X_train, theta) print MSE(predictions, Y_train) thetaOLS = ridgeRegression(X_train, Y_train, l=0, M=10) predictions = predictRidge(X_train, thetaOLS)
def scaleFeatures(X, mean=None, sigma=None): if mean == None and sigma == None: mean = np.mean(X, axis=0) sigma = np.std(X, axis=0) meanArray = np.repeat(mean.reshape(1, -1), X.shape[0], axis=0) sigmaArray = np.repeat(sigma.reshape(1, -1), X.shape[0], axis=0) scaledFeatures = (X - meanArray) / sigmaArray return scaledFeatures, mean, sigma ########## X, Y = homework1.getData( '/Users/dholtz/Downloads/6867_hw1_data/curvefitting.txt') theta = ridgeRegression(X, Y, l=0, M=10) lambdas = np.array([0, .01, .1, 1, 10, 100]) Ms = np.array([1, 2, 3, 4, 5]) X_train, Y_train = homework1.getData( '/Users/dholtz/Downloads/6867_hw1_data/regress_train.txt') X_test, Y_test = homework1.getData( '/Users/dholtz/Downloads/6867_hw1_data/regress_test.txt') X_validate, Y_validate = homework1.getData( '/Users/dholtz/Downloads/6867_hw1_data/regress_validate.txt') theta = ridgeRegression(X_train, Y_train, M=100) predictions = predictRidge(X_train, theta) print MSE(predictions, Y_train)