def __init__(self, loss, regularization=None, learning_rate=0.01, reg_param=0.05): self.learning_rate = learning_rate # Select regularizer if regularization == 'l1': regularizer = L1Regularization(reg_param) elif regularization == 'l2': regularizer = L2Regularization(reg_param) elif regularization is None: regularizer = None else: raise ValueError( 'Regularizer {} is not defined'.format(regularization)) # Select loss function if loss == 'hinge': self.loss = HingeLoss(regularizer) elif loss == 'squared': self.loss = SquaredLoss(regularizer) else: raise ValueError('Loss function {} is not defined'.format(loss)) self.model = None # storage for frq self.losses = [] self.predictions = [] self.convergence = False self.iterations = 0
class GradientDescent(): def __init__(self, regularization=None, learning_rate=0.0000012): self.learning_rate = learning_rate self.model = None self.loss = SquaredLoss(None) def fit(self, features, targets, max_iter=100000): #Initialize parameters param = (np.array([74.0, 1.0]).reshape(1, -1)) #Include w_0 f = np.concatenate((features, np.ones([len(features), 1])), axis=1) #Trackering variables iter = 0 prev_loss = 0 not_converged = True #Training while not_converged and iter < max_iter: if abs(prev_loss - self.loss.loss(f, targets, param)) < 0.0001: not_converged = False prev_loss = self.loss.loss(f, targets, param) grad = self.loss.gradient(f, targets, param) param -= self.learning_rate * grad iter += 1 #Model self.model = param def predict_quantized(self, features): unquantized_prediction = predict(features) quantized_prediction = np.zeros(len(unquantized_prediction)) for i, val in enumerate(confidence): if val >= .5: prediction[i] = 1 elif val < 0.5: prediction[i] = 0 return prediction def predict(self, features): f = np.concatenate((features, np.ones([len(features), 1])), axis=1) return np.dot(self.model, np.transpose(f)) def score(self, features, targets): print(self.model) prediction = self.predict(features) u = np.sum((targets - prediction)**2) v = np.sum((targets - np.mean(targets))**2) return (1 - (u / v))
def test_CheckMinibatchTrainerEqualsSimpleTrainer(self): train_set = [(np.random.rand(2), i) for i in xrange(3)] loss = SquaredLoss() epochs = 1 optimizer = SGD(learning_rate=0.01) minibatch_model = Seq([Linear(2, 5, initialize='ones')]) minibatch_trainer = MinibatchTrainer() minibatch_trainer.train_minibatches(minibatch_model, train_set, batch_size=1, loss=loss, epochs=epochs, optimizer=optimizer, shuffle=False) simple_model = Seq([Linear(2, 5, initialize='ones')]) simple_trainer = OnlineTrainer() simple_trainer.train(simple_model, train_set, loss, epochs, optimizer) x = np.random.rand(2) simple_y = simple_model.forward(x) minibatch_y = minibatch_model.forward(x) assert_array_equal(simple_y, minibatch_y)
def test_Perceptron(self): train_set, test_set = gen_data() model = Seq([ Linear(2, 5, initialize='random'), Sigmoid(), Linear(5, 1, initialize='random'), Sigmoid(), ]) OnlineTrainer().train( model, train_set=train_set, loss=SquaredLoss(), # optimizer=SGD(learning_rate=0.1), optimizer=MomentumSGD(learning_rate=0.1, momentum=0.9), # optimizer=AdaGrad(learning_rate=0.9), # optimizer=RMSProp(learning_rate=0.1, decay_rate=0.9), epochs=200, save_progress=False) # model.learn_minibatch( # input_data=train_data, # target_data=train_targets, # loss=SquaredLoss(), # batch_size=5, # # optimizer=SGD(learning_rate=0.1), # # optimizer=MomentumSGD(learning_rate=0.1, momentum=0.9), # optimizer=AdaGrad(learning_rate=0.9), # # optimizer=RMSProp(learning_rate=0.1, decay_rate=0.9), # # epochs=100, # save_progress=True) model.save_to_file('perceptron.pkl') scatter_test_data(test_set, model) # model.plot_errors_history() # model.plot_loss_gradient_history() plt.show()
class GradientDescent: """ This is a linear classifier similar to the one you implemented in the linear regressor homework. This is the classification via regression case. The goal here is to learn some hyperplane, y = w^T x + b, such that when features, x, are processed by our model (w and b), the result is some value y. If y is in [0.0, +inf), the predicted classification label is +1 and if y is in (-inf, 0.0) the predicted classification label is -1. The catch here is that we will not be using the closed form solution, rather, we will be using gradient descent. In your fit function you will determine a loss and update your model (w and b) using gradient descent. More details below. Arguments: loss - (string) The loss function to use. Either 'hinge' or 'squared'. regularization - (string or None) The type of regularization to use. One of 'l1', 'l2', or None. See regularization.py for more details. learning_rate - (float) The size of each gradient descent update step. reg_param - (float) The hyperparameter that controls the amount of regularization to perform. Must be non-negative. """ def __init__(self, loss, regularization=None, learning_rate=0.01, reg_param=0.05): self.learning_rate = learning_rate # Select regularizer if regularization == 'l1': regularizer = L1Regularization(reg_param) elif regularization == 'l2': regularizer = L2Regularization(reg_param) elif regularization is None: regularizer = None else: raise ValueError( 'Regularizer {} is not defined'.format(regularization)) # Select loss function if loss == 'hinge': self.loss = HingeLoss(regularizer) elif loss == 'squared': self.loss = SquaredLoss(regularizer) else: raise ValueError('Loss function {} is not defined'.format(loss)) self.model = None # storage for frq self.losses = [] self.predictions = [] self.convergence = False self.iterations = 0 def fit(self, features, targets, batch_size=None, max_iter=1000): """ Fits a gradient descent learner to the features and targets. The pseudocode for the fitting algorithm is as follow: - Initialize the model parameters to uniform random values in the interval [-0.1, +0.1]. - While not converged: - Compute the gradient of the loss with respect to the current batch. - Update the model parameters by moving them in the direction opposite to the current gradient. Use the learning rate as the step size. For the convergence criteria, compute the loss over all examples. If this loss changes by less than 1e-4 during an update, assume that the model has converged. If this convergence criteria has not been met after max_iter iterations, also assume convergence and terminate. You should include a bias term by APPENDING a column of 1s to your feature matrix. The bias term is then the last value in self.model. Arguments: features - (np.array) An Nxd array of features, where N is the number of examples and d is the number of features. targets - (np.array) A 1D array of targets of length N. batch_size - (int or None) The number of examples used in each iteration. If None, use all of the examples in each update. max_iter - (int) The maximum number of updates to perform. Modifies: self.model - (np.array) A 1D array of model parameters of length d+1. The +1 refers to the bias term. """ # initialize self.model = np.random.uniform(-.1, .1, features.shape[1] + 1) converged = False num_iters = 0 previous_loss = 0 current_loss = 0 convergence_threshold = .0001 ones = np.ones((features.shape[0], 1)) features = np.hstack((features, ones)) random_indexes = np.arange(0, features.shape[0]) np.random.shuffle(random_indexes) # while not converged while not converged and num_iters < max_iter: num_iters += 1 # check batch size b_features = features b_targets = targets if batch_size: index = (num_iters * batch_size) % features.shape[0] selector = random_indexes[index:index + batch_size] b_features = features[selector, :] b_targets = targets[selector] # edit parameters gradient = self.loss.backward(b_features, self.model, b_targets) change = self.learning_rate * (-(gradient)) self.model += change # check convergence current_loss = self.loss.forward(features, self.model, targets) if previous_loss != 0: calculated_loss_change = np.abs(previous_loss - current_loss) if calculated_loss_change < convergence_threshold: converged = True previous_loss = current_loss # storage for frq #self.losses.append(current_loss) #self.predictions.append(self.predict(features[:,:-1])) self.convergence = converged self.iterations = num_iters return self.model def predict(self, features): """ Predicts the class labels of each example in features. Model output values at and above 0 are predicted to have label +1. Non-positive output values are predicted to have label -1. NOTE: your predict function should make use of your confidence function (see below). Arguments: features - (np.array) A Nxd array of features, where N is the number of examples and d is the number of features. Returns: predictions - (np.array) A 1D array of predictions of length N, where index d corresponds to the prediction of row N of features. """ predictions = np.sign(self.confidence(features)) return predictions def confidence(self, features): """ Returns the raw model output of the prediction. In other words, rather than predicting +1 for values above 0 and -1 for other values, this function returns the original, unquantized value. Arguments: features - (np.array) A Nxd array of features, where N is the number of examples and d is the number of features. Returns: confidence - (np.array) A 1D array of confidence values of length N, where index d corresponds to the confidence of row N of features. """ confidence = np.dot(features, self.model[:-1]) return confidence
def __init__(self, regularization=None, learning_rate=0.0000012): self.learning_rate = learning_rate self.model = None self.loss = SquaredLoss(None)