def test_squared_loss_backward(): """ Tests the backward pass of the squared loss function """ from your_code import SquaredLoss X = np.array([[-1, 2, 1], [-3, 4, 1]]) w = np.array([1, 2, 3]) y = np.array([1, -1]) loss = SquaredLoss(regularization=None) _true = np.array([-16, 23, 7]) _est = loss.backward(X, w, y) assert np.allclose(_true, _est)
class GradientDescent: def __init__(self, loss, regularization=None, learning_rate=0.01, reg_param=0.05): self.learning_rate = learning_rate if regularization == 'l1': regularizer = L1Regularization(reg_param) elif regularization == 'l2': regularizer = L2Regularization(reg_param) elif regularization is None: regularizer = None else: raise ValueError( 'Regularizer {} is not defined'.format(regularization)) if loss == 'hinge': self.loss = HingeLoss(regularizer) elif loss == 'squared': self.loss = SquaredLoss(regularizer) else: raise ValueError('Loss function {} is not defined'.format(loss)) self.model = None def fit(self, features, targets, batch_size=None, max_iter=1000): features = np.append(features, np.ones((np.shape(features)[0], 1)), axis=1) self.model = np.random.uniform(-0.1, 0.1, features.shape[1]) loss = -9999999 new_loss = self.loss.forward(features, self.model, targets) if batch_size: new_features = np.array_split(features[:-np.shape(features)[0] % batch_size, :], int(len(features) / batch_size)) new_targets = np.array_split(targets[:-np.shape(features)[0] % batch_size], int(len(targets) / batch_size)) order = np.random.shuffle(range(np.shape(new_features)[0])) counter = 0 while abs(new_loss - loss) > 0 and counter < max_iter: new_loss = loss if batch_size: for i in order: gradient = self.loss.backward(new_features[i], self.model, new_targets[i]) self.model = self.model-self.learning_rate * gradient loss = self.loss.forward(features, self.model, targets) np.random.shuffle(order) else: gradient = self.loss.backward(features, self.model, targets) self.model = self.model-self.learning_rate * gradient new_loss = self.loss.forward(features, self.model, targets) counter += 1 def predict(self, features): features = np.squeeze(np.append(features, np.ones((np.shape(features)[0], 1)), axis=1)) return np.squeeze(np.where(self.confidence(features) < 0, -1, 1)) def confidence(self, features): ans=np.empty([]) for x in features: ans=np.append(ans, np.squeeze(np.dot(np.transpose(self.model), x))) ans=np.delete(ans, 0) return np.squeeze(ans)
class GradientDescent: """ This is a linear classifier similar to the one you implemented in the linear regressor homework. This is the classification via regression case. The goal here is to learn some hyperplane, y = w^T x + b, such that when features, x, are processed by our model (w and b), the result is some value y. If y is in [0.0, +inf), the predicted classification label is +1 and if y is in (-inf, 0.0) the predicted classification label is -1. The catch here is that we will not be using the closed form solution, rather, we will be using gradient descent. In your fit function you will determine a loss and update your model (w and b) using gradient descent. More details below. Arguments: loss - (string) The loss function to use. Either 'hinge' or 'squared'. regularization - (string or None) The type of regularization to use. One of 'l1', 'l2', or None. See regularization.py for more details. learning_rate - (float) The size of each gradient descent update step. reg_param - (float) The hyperparameter that controls the amount of regularization to perform. Must be non-negative. """ def __init__(self, loss, regularization=None, learning_rate=0.01, reg_param=0.05): self.learning_rate = learning_rate # Select regularizer if regularization == 'l1': regularizer = L1Regularization(reg_param) #? elif regularization == 'l2': regularizer = L2Regularization(reg_param) elif regularization is None: regularizer = None else: raise ValueError( 'Regularizer {} is not defined'.format(regularization)) # Select loss function if loss == 'hinge': self.loss = HingeLoss(regularizer) elif loss == 'squared': self.loss = SquaredLoss(regularizer) else: raise ValueError('Loss function {} is not defined'.format(loss)) self.model = None def fit(self, features, targets, batch_size=None, max_iter=1000): """ Fits a gradient descent learner to the features and targets. The pseudocode for the fitting algorithm is as follow: - Initialize the model parameters to uniform random values in the interval [-0.1, +0.1]. - While not converged: - Compute the gradient of the loss with respect to the current batch. - Update the model parameters by moving them in the direction opposite to the current gradient. Use the learning rate as the step size. For the convergence criteria, compute the loss over all examples. If this loss changes by less than 1e-4 during an update, assume that the model has converged. If this convergence criteria has not been met after max_iter iterations, also assume convergence and terminate. You should include a bias term by APPENDING a column of 1s to your feature matrix. The bias term is then the last value in self.model. Arguments: features - (np.array) An Nxd array of features, where N is the number of examples and d is the number of features. targets - (np.array) A 1D array of targets of length N. batch_size - (int or None) The number of examples used in each iteration. If None, use all of the examples in each update. max_iter - (int) The maximum number of updates to perform. Modifies: self.model - (np.array) A 1D array of model parameters of length d+1. The +1 refers to the bias term. """ N = features.shape[0] d = features.shape[1] X = np.hstack((features, np.ones((N, 1)))) #[x1,x2... 1] # np.random.seed w = np.random.uniform(-0.1, 0.1, d + 1) iter = 0 prev_loss = 100000 feature_indices = [] if not batch_size: batch_size = N while iter < max_iter: feature_indices = np.arange(batch_size) np.random.shuffle(feature_indices) # if batch_size==None: # feature_indices = np.random.choice(N, N, replace=False) selected_features = X[feature_indices[:], :] w_copy = np.copy(w) loss = self.loss.forward(X=selected_features, w=w_copy, y=targets[feature_indices[:]]) # import pdb; pdb.set_trace() # print("loss: ", loss) # if abs(loss ) < 1e-4: #TODO: low loss? if abs(loss - prev_loss) < 1e-4: #TODO: changed convergence # print("exiting loop, prev loss: ", prev_loss) print("iter: ", iter) break else: w = w - self.learning_rate * self.loss.backward( X=selected_features, w=w_copy, y=targets[feature_indices[:]]) #? iter += 1 prev_loss = loss self.model = w #test return iter def predict(self, features): """ Predicts the class labels of each example in features. Model output values at and above 0 are predicted to have label +1. Non-positive output values are predicted to have label -1. NOTE: your predict function should make use of your confidence function (see below). Arguments: features - (np.array) A Nxd array of features, where N is the number of examples and d is the number of features. Returns: predictions - (np.array) A 1D array of predictions of length N, where index d corresponds to the prediction of row N of features. """ N = features.shape[0] d = features.shape[1] X = np.hstack((features, np.ones((N, 1)))) #[x1,x2... 1] # print ("X: ", X) # print ("raw output: ", self.model.dot(X.T)) return np.sign(self.model.dot(X.T)) def confidence(self, features): """ Returns the raw model output of the prediction. In other words, rather than predicting +1 for values above 0 and -1 for other values, this function returns the original, unquantized value. Arguments: features - (np.array) A Nxd array of features, where N is the number of examples and d is the number of features. Returns: confidence - (np.array) A 1D array of confidence values of length N, where index d corresponds to the confidence of row N of features. """ N = features.shape[0] d = features.shape[1] X = np.hstack((features, np.ones((N, 1)))) #[x1,x2... 1] return self.model.dot(X.T)
class GradientDescent: """ This is a linear classifier similar to the one you implemented in the linear regressor homework. This is the classification via regression case. The goal here is to learn some hyperplane, y = w^T x + b, such that when features, x, are processed by our model (w and b), the result is some value y. If y is in [0.0, +inf), the predicted classification label is +1 and if y is in (-inf, 0.0) the predicted classification label is -1. The catch here is that we will not be using the closed form solution, rather, we will be using gradient descent. In your fit function you will determine a loss and update your model (w and b) using gradient descent. More details below. Arguments: loss - (string) The loss function to use. Either 'hinge' or 'squared'. regularization - (string or None) The type of regularization to use. One of 'l1', 'l2', or None. See regularization.py for more details. learning_rate - (float) The size of each gradient descent update step. reg_param - (float) The hyperparameter that controls the amount of regularization to perform. Must be non-negative. """ def __init__(self, loss, regularization=None, learning_rate=0.01, reg_param=0.05): self.learning_rate = learning_rate # Select regularizer if regularization == 'l1': regularizer = L1Regularization(reg_param) elif regularization == 'l2': regularizer = L2Regularization(reg_param) elif regularization is None: regularizer = None else: raise ValueError( 'Regularizer {} is not defined'.format(regularization)) # Select loss function if loss == 'hinge': self.loss = HingeLoss(regularizer) elif loss == 'squared': self.loss = SquaredLoss(regularizer) else: raise ValueError('Loss function {} is not defined'.format(loss)) self.model = None def fit(self, features, targets, batch_size=None, max_iter=1000): """ Fits a gradient descent learner to the features and targets. The pseudocode for the fitting algorithm is as follow: - Initialize the model parameters to uniform random values in the interval [-0.1, +0.1]. - While not converged: - Compute the gradient of the loss with respect to the current batch. - Update the model parameters by moving them in the direction opposite to the current gradient. Use the learning rate as the step size. For the convergence criteria, compute the loss over all examples. If this loss changes by less than 1e-4 during an update, assume that the model has converged. If this convergence criteria has not been met after max_iter iterations, also assume convergence and terminate. You should include a bias term by APPENDING a column of 1s to your feature matrix. The bias term is then the last value in self.model. Arguments: features - (np.array) An Nxd array of features, where N is the number of examples and d is the number of features. targets - (np.array) A 1D array of targets of length N. batch_size - (int or None) The number of examples used in each iteration. If None, use all of the examples in each update. max_iter - (int) The maximum number of updates to perform. Modifies: self.model - (np.array) A 1D array of model parameters of length d+1. The +1 refers to the bias term. """ weights = np.random.uniform(-.1,.1,(1,features.shape[1])) y = np.ones((features.shape[0],1)) features = np.append(features,y, axis = 1) print("features") print(features) if(batch_size != None): features = features[0:batch_size] weights = np.append(weights,1) loss = 1 newLoss = 0 gradient = 0 for i in range(max_iter): while(abs(loss - newLoss) > .0001): loss = self.loss.forward(features, weights, targets) gradient = self.loss.backward(features, weights, targets) weights = weights - (self.learning_rate * gradient) newLoss = self.loss.forward(features, weights, targets) self.model = weights def predict(self, features): """ Predicts the class labels of each example in features. Model output values at and above 0 are predicted to have label +1. Non-positive output values are predicted to have label -1. NOTE: your predict function should make use of your confidence function (see below). Arguments: features - (np.array) A Nxd array of features, where N is the number of examples and d is the number of features. Returns: predictions - (np.array) A 1D array of predictions of length N, where index d corresponds to the prediction of row N of features. """ y = np.ones((features.shape[0],1)) features = np.append(features,y, axis = 1) confidence = self.confidence(features) predictions = [] for i in range(len(confidence)): predictions.append(np.sign(confidence[i])) return np.asarray(predictions) def confidence(self, features): """ Returns the raw model output of the prediction. In other words, rather than predicting +1 for values above 0 and -1 for other values, this function returns the original, unquantized value. Arguments: features - (np.array) A Nxd array of features, where N is the number of examples and d is the number of features. Returns: confidence - (np.array) A 1D array of confidence values of length N, where index d corresponds to the confidence of row N of features. """ confidence = [] for i in range (features.shape[0]): confidence.append(np.dot(self.model, features[i])) return np.asarray(confidence)
class GradientDescentQ1: """ This is a linear classifier similar to the one you implemented in the linear regressor homework. This is the classification via regression case. The goal here is to learn some hyperplane, y = w^T x + b, such that when features, x, are processed by our model (w and b), the result is some value y. If y is in [0.0, +inf), the predicted classification label is +1 and if y is in (-inf, 0.0) the predicted classification label is -1. The catch here is that we will not be using the closed form solution, rather, we will be using gradient descent. In your fit function you will determine a loss and update your model (w and b) using gradient descent. More details below. Arguments: loss - (string) The loss function to use. Either 'hinge' or 'squared'. regularization - (string or None) The type of regularization to use. One of 'l1', 'l2', or None. See regularization.py for more details. learning_rate - (float) The size of each gradient descent update step. reg_param - (float) The hyperparameter that controls the amount of regularization to perform. Must be non-negative. """ def __init__(self, loss, regularization=None, learning_rate=0.01, reg_param=0.05, question='1a'): self.learning_rate = learning_rate # Select regularizer if regularization == 'l1': regularizer = L1Regularization(reg_param) elif regularization == 'l2': regularizer = L2Regularization(reg_param) elif regularization is None: regularizer = None else: raise ValueError( 'Regularizer {} is not defined'.format(regularization)) # Select loss function if loss == 'hinge': self.loss = HingeLoss(regularizer) elif loss == 'squared': self.loss = SquaredLoss(regularizer) else: raise ValueError('Loss function {} is not defined'.format(loss)) self.model = None self.question = question def fit(self, features, targets, batch_size=None, max_iter=1000): """ Fits a gradient descent learner to the features and targets. The pseudocode for the fitting algorithm is as follow: - Initialize the model parameters to uniform random values in the interval [-0.1, +0.1]. - While not converged: - Compute the gradient of the loss with respect to the current batch. - Update the model parameters by moving them in the direction opposite to the current gradient. Use the learning rate as the step size. For the convergence criteria, compute the loss over all examples. If this loss changes by less than 1e-4 during an update, assume that the model has converged. If this convergence criteria has not been met after max_iter iterations, also assume convergence and terminate. You should include a bias term by APPENDING a column of 1s to your feature matrix. The bias term is then the last value in self.model. Arguments: features - (np.array) An Nxd array of features, where N is the number of examples and d is the number of features. targets - (np.array) A 1D array of targets of length N. batch_size - (int or None) The number of examples used in each iteration. If None, use all of the examples in each update. max_iter - (int) The maximum number of updates to perform. Modifies: self.model - (np.array) A 1D array of model parameters of length d+1. The +1 refers to the bias term. """ accuracy_list = [] loss_list = [] iteration_list = [] self.model = np.zeros(features[0].shape) for i in range(len(self.model)): self.model[i] = random.uniform(-0.1, 0.1) iteration = 0 loss = None while iteration < max_iter: if batch_size is None: sample_features = features sample_targets = targets else: sample_features = random.sample(features, batch_size) sample_targets = random.sample(targets, batch_size) self.model = self.model - self.learning_rate * \ self.loss.backward(sample_features, self.model, sample_targets) new_loss = self.loss.forward(sample_features, self.model, sample_targets) if loss is not None and abs(new_loss - loss) < 1e-4: break loss = new_loss loss_list.append(loss) accuracy = metrics.accuracy(targets, self.predict(features)) accuracy_list.append(accuracy) iteration_list.append(iteration) iteration += 1 plt.figure() plt.plot(iteration_list, loss_list, color='orange', label='Loss') plt.plot(iteration_list, accuracy_list, color='blue', label='Accuracy') plt.title('Loss & Accuracy Vs. Iteration No.') plt.xlabel('Iteration') plt.ylabel('Loss & Accuracy') plt.legend(loc="best") if self.question == '1a': plt.savefig("Q1a.png") if self.question == '1b': plt.savefig("Q1b.png") def predict(self, features): """ Predicts the class labels of each example in features. Model output values at and above 0 are predicted to have label +1. Non-positive output values are predicted to have label -1. NOTE: your predict function should make use of your confidence function (see below). Arguments: features - (np.array) A Nxd array of features, where N is the number of examples and d is the number of features. Returns: predictions - (np.array) A 1D array of predictions of length N, where index d corresponds to the prediction of row N of features. """ confidence = self.confidence(features) predictions = np.zeros(confidence.shape) for i in range(len(confidence)): predictions[i] = np.sign(confidence[i]) return predictions def confidence(self, features): """ Returns the raw model output of the prediction. In other words, rather than predicting +1 for values above 0 and -1 for other values, this function returns the original, unquantized value. Arguments: features - (np.array) A Nxd array of features, where N is the number of examples and d is the number of features. Returns: confidence - (np.array) A 1D array of confidence values of length N, where index d corresponds to the confidence of row N of features. """ confidence = features.dot(self.model) return confidence