def train(self,X,y,reg=1e-5,num_iters=400,norm=True): """ Train a linear model using scipy's function minimization. Inputs: - X: N X D array of training data. Each training point is a D-dimensional row. - y: 1-dimensional array of length N with values in the reals. - reg: (float) regularization strength. - num_iters: (integer) number of steps to take when optimizing - norm: a boolean which indicates whether the X matrix is standardized before solving the optimization problem Outputs: - optimal value for theta """ num_train,dim = X.shape # standardize features if norm=True if norm: # take out the first column and do the feature normalize X_without_1s = X[:,1:] X_norm, mu, sigma = utils.std_features(X_without_1s) # add the ones back XX = np.vstack([np.ones((X_norm.shape[0],)),X_norm.T]).T else: XX = X # initialize theta theta = np.zeros((dim,)) # Run scipy's fmin algorithm to run gradient descent theta_opt_norm = scipy.optimize.fmin_bfgs(self.loss, theta, fprime = self.grad_loss, args=(XX,y,reg),maxiter=num_iters) if norm: # convert theta back to work with original X theta_opt = np.zeros(theta_opt_norm.shape) theta_opt[1:] = theta_opt_norm[1:]/sigma theta_opt[0] = theta_opt_norm[0] - np.dot(theta_opt_norm[1:],mu/sigma) else: theta_opt = theta_opt_norm return theta_opt
def train(self, X, y, num_iters=400): """ Train a linear model using scipy's function minimization. Inputs: - X: N X D array of training data. Each training point is a D-dimensional row. - y: 1-dimensional array of length N with values in the reals. - num_iters: (integer) number of steps to take when optimizing Outputs: - optimal value for theta """ num_train, dim = X.shape # standardize X so that each column has zero mean and unit variance # remember to take out the first column and do the feature normalize X_without_1s = X[:, 1:] X_norm, mu, sigma = utils.std_features(X_without_1s) # add the ones back and assemble the XX matrix for training XX = np.vstack([np.ones((X_norm.shape[0], )), X_norm.T]).T theta = np.zeros((dim, )) # Run scipy's fmin algorithm to run gradient descent theta_opt_norm = scipy.optimize.fmin_bfgs(self.loss, theta, fprime=self.grad_loss, args=(XX, y), maxiter=num_iters) # convert theta back to work with original X theta_opt = np.zeros(theta_opt_norm.shape) theta_opt[1:] = theta_opt_norm[1:] / sigma theta_opt[0] = theta_opt_norm[0] - np.dot(theta_opt_norm[1:], mu / sigma) return theta_opt
def train(self,X,y,num_iters=400): """ Train a linear model using scipy's function minimization. Inputs: - X: N X D array of training data. Each training point is a D-dimensional row. - y: 1-dimensional array of length N with values in the reals. - num_iters: (integer) number of steps to take when optimizing Outputs: - optimal value for theta """ num_train,dim = X.shape # standardize X so that each column has zero mean and unit variance # remember to take out the first column and do the feature normalize X_without_1s = X[:,1:] X_norm, mu, sigma = utils.std_features(X_without_1s) # add the ones back and assemble the XX matrix for training XX = np.vstack([np.ones((X_norm.shape[0],)),X_norm.T]).T theta = np.zeros((dim,)) # Run scipy's fmin algorithm to run gradient descent theta_opt_norm = scipy.optimize.fmin_bfgs(self.loss, theta, fprime = self.grad_loss, args=(XX,y),maxiter=num_iters) # convert theta back to work with original X theta_opt = np.zeros(theta_opt_norm.shape) theta_opt[1:] = theta_opt_norm[1:]/sigma theta_opt[0] = theta_opt_norm[0] - np.dot(theta_opt_norm[1:],mu/sigma) return theta_opt
X_train.append(x) if i % 5000 == 0: print("Reading data for index = ", i) X_train = np.asarray(X_train) X_val = [] for i in range(10001, 11001): file = "../train/" + str(i) + ".png" x = si.imread(file) x = x.reshape(3072) X_val.append(x) if i % 5000 == 0: print("Reading data for index = ", i) X_val = np.asarray(X_val) X_train, _, _ = utils.std_features(X_train) X_train = np.vstack((np.ones(X_train.shape[0]), X_train.T)).T X_val, _, _ = utils.std_features(X_val) X_val = np.vstack((np.ones(X_val.shape[0]), X_val.T)).T theta = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0) # Loss should be something close to - log(0.1) print 'loss:', loss, ' should be close to ', -np.log(0.1) tic = time.time() loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001) toc = time.time()
import scipy.io import utils import numpy as np from sklearn import linear_model # No modifications in this script # complete the functions in util.py; then run the script # load the spam data in Xtrain,Xtest,ytrain,ytest = utils.load_spam_data() # Preprocess the data Xtrain_std,mu,sigma = utils.std_features(Xtrain) Xtrain_logt = utils.log_features(Xtrain) Xtrain_bin = utils.bin_features(Xtrain) Xtest_std = (Xtest - mu)/sigma Xtest_logt = utils.log_features(Xtest) Xtest_bin = utils.bin_features(Xtest) # find good lambda by cross validation for these three sets def run_dataset(X,ytrain,Xt,ytest,type,penalty): best_lambda = utils.select_lambda_crossval(X,ytrain,0.1,5.1,0.5,penalty) print "best_lambda = ", best_lambda # train a classifier on best_lambda and run it if penalty == "l2":
if i % 5000 == 0: print("Reading data for index = ", i) X_train = np.asarray(X_train) X_val = [] for i in range(10001,11001): file = "../train/"+str(i)+".png" x = si.imread(file) x = x.reshape(3072) X_val.append(x) if i % 5000 == 0: print("Reading data for index = ", i) X_val = np.asarray(X_val) X_train,_,_ = utils.std_features(X_train) X_train = np.vstack((np.ones(X_train.shape[0]), X_train.T)).T X_val,_,_ = utils.std_features(X_val) X_val = np.vstack((np.ones(X_val.shape[0]), X_val.T)).T theta = np.random.randn(3073,10) * 0.0001 loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0) # Loss should be something close to - log(0.1) print 'loss:', loss, ' should be close to ', - np.log(0.1) tic = time.time() loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001) toc = time.time()