Esempio n. 1
0
    def train(self,X,y,reg=1e-5,num_iters=400,norm=True):

        """
        Train a linear model using scipy's function minimization.
        
        Inputs:
        - X: N X D array of training data. Each training point is a D-dimensional
         row.
        - y: 1-dimensional array of length N with values in the reals.
        - reg: (float) regularization strength.
        - num_iters: (integer) number of steps to take when optimizing
        - norm: a boolean which indicates whether the X matrix is standardized before
                solving the optimization problem

        Outputs:
        - optimal value for theta
        """

        num_train,dim = X.shape

        # standardize features if norm=True

        if norm:
            # take out the first column and do the feature normalize
            X_without_1s = X[:,1:]
            X_norm, mu, sigma = utils.std_features(X_without_1s)
            # add the ones back
            XX = np.vstack([np.ones((X_norm.shape[0],)),X_norm.T]).T
        else:
            XX = X

        # initialize theta
        theta = np.zeros((dim,))

        # Run scipy's fmin algorithm to run gradient descent
        theta_opt_norm = scipy.optimize.fmin_bfgs(self.loss, theta, fprime = self.grad_loss, args=(XX,y,reg),maxiter=num_iters)


        if norm:
            # convert theta back to work with original X
            theta_opt = np.zeros(theta_opt_norm.shape)
            theta_opt[1:] = theta_opt_norm[1:]/sigma
            theta_opt[0] = theta_opt_norm[0] - np.dot(theta_opt_norm[1:],mu/sigma)
        else:
            theta_opt = theta_opt_norm


        return theta_opt
Esempio n. 2
0
    def train(self, X, y, num_iters=400):
        """
        Train a linear model using scipy's function minimization.
        
        Inputs:
        - X: N X D array of training data. Each training point is a D-dimensional
         row.
        - y: 1-dimensional array of length N with values in the reals.
        - num_iters: (integer) number of steps to take when optimizing

        Outputs:
        - optimal value for theta
        """

        num_train, dim = X.shape

        # standardize X so that each column has zero mean and unit variance
        # remember to take out the first column and do the feature normalize

        X_without_1s = X[:, 1:]
        X_norm, mu, sigma = utils.std_features(X_without_1s)

        # add the ones back and assemble the XX matrix for training

        XX = np.vstack([np.ones((X_norm.shape[0], )), X_norm.T]).T
        theta = np.zeros((dim, ))

        # Run scipy's fmin algorithm to run gradient descent

        theta_opt_norm = scipy.optimize.fmin_bfgs(self.loss,
                                                  theta,
                                                  fprime=self.grad_loss,
                                                  args=(XX, y),
                                                  maxiter=num_iters)

        # convert theta back to work with original X
        theta_opt = np.zeros(theta_opt_norm.shape)
        theta_opt[1:] = theta_opt_norm[1:] / sigma
        theta_opt[0] = theta_opt_norm[0] - np.dot(theta_opt_norm[1:],
                                                  mu / sigma)

        return theta_opt
Esempio n. 3
0
    def train(self,X,y,num_iters=400):

        """
        Train a linear model using scipy's function minimization.
        
        Inputs:
        - X: N X D array of training data. Each training point is a D-dimensional
         row.
        - y: 1-dimensional array of length N with values in the reals.
        - num_iters: (integer) number of steps to take when optimizing

        Outputs:
        - optimal value for theta
        """

        num_train,dim = X.shape
        
        # standardize X so that each column has zero mean and unit variance
        # remember to take out the first column and do the feature normalize

        X_without_1s = X[:,1:]
        X_norm, mu, sigma = utils.std_features(X_without_1s)

        # add the ones back and assemble the XX matrix for training

        XX = np.vstack([np.ones((X_norm.shape[0],)),X_norm.T]).T
        theta = np.zeros((dim,))

        # Run scipy's fmin algorithm to run gradient descent

        theta_opt_norm = scipy.optimize.fmin_bfgs(self.loss, theta, fprime = self.grad_loss, args=(XX,y),maxiter=num_iters)

        # convert theta back to work with original X
        theta_opt = np.zeros(theta_opt_norm.shape)
        theta_opt[1:] = theta_opt_norm[1:]/sigma
        theta_opt[0] = theta_opt_norm[0] - np.dot(theta_opt_norm[1:],mu/sigma)


        return theta_opt
Esempio n. 4
0
    X_train.append(x)
    if i % 5000 == 0:
        print("Reading data for index = ", i)
X_train = np.asarray(X_train)

X_val = []
for i in range(10001, 11001):
    file = "../train/" + str(i) + ".png"
    x = si.imread(file)
    x = x.reshape(3072)
    X_val.append(x)
    if i % 5000 == 0:
        print("Reading data for index = ", i)
X_val = np.asarray(X_val)

X_train, _, _ = utils.std_features(X_train)
X_train = np.vstack((np.ones(X_train.shape[0]), X_train.T)).T

X_val, _, _ = utils.std_features(X_val)
X_val = np.vstack((np.ones(X_val.shape[0]), X_val.T)).T

theta = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0)

# Loss should be something close to - log(0.1)

print 'loss:', loss, ' should be close to ', -np.log(0.1)

tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001)
toc = time.time()
Esempio n. 5
0
import scipy.io
import utils
import numpy as np
from sklearn import linear_model

# No modifications in this script
# complete the functions in util.py; then run the script

# load the spam data in

Xtrain,Xtest,ytrain,ytest = utils.load_spam_data()

# Preprocess the data 

Xtrain_std,mu,sigma = utils.std_features(Xtrain)
Xtrain_logt = utils.log_features(Xtrain)
Xtrain_bin = utils.bin_features(Xtrain)

Xtest_std = (Xtest - mu)/sigma
Xtest_logt = utils.log_features(Xtest)
Xtest_bin = utils.bin_features(Xtest)

# find good lambda by cross validation for these three sets

def run_dataset(X,ytrain,Xt,ytest,type,penalty):

    best_lambda = utils.select_lambda_crossval(X,ytrain,0.1,5.1,0.5,penalty)
    print "best_lambda = ", best_lambda

    # train a classifier on best_lambda and run it
    if penalty == "l2":
  if i % 5000 == 0:
    print("Reading data for index = ", i)
X_train = np.asarray(X_train)

X_val = []
for i in range(10001,11001):
  file = "../train/"+str(i)+".png"
  x = si.imread(file)
  x = x.reshape(3072)
  X_val.append(x)
  if i % 5000 == 0:
    print("Reading data for index = ", i)
X_val = np.asarray(X_val)


X_train,_,_ = utils.std_features(X_train)
X_train = np.vstack((np.ones(X_train.shape[0]), X_train.T)).T

X_val,_,_ = utils.std_features(X_val)
X_val = np.vstack((np.ones(X_val.shape[0]), X_val.T)).T

theta = np.random.randn(3073,10) * 0.0001
loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0)

# Loss should be something close to - log(0.1)

print 'loss:', loss, ' should be close to ', - np.log(0.1)

tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001)
toc = time.time()