def fit(self, loader: DataLoader, optimizer=None, loss_function=None) -> None: """ Fits the model to the data. If no optimizer is passed in, the default optimizer is SGD. If no loss function is passed in, the default loss function is MSE. :returns: None; self.params are fit to the data. """ if optimizer is None: optimizer = SGD(0.01) if loss_function is None: loss_function = mean_squared_error for X, y in loader: if self.params is None: self.params = Matrix([[Variable(random.random())] for _ in range(len(X[0]))]) self.bias = Matrix([[Variable(random.random())]]) output = self._evaluate(X) loss = loss_function(output, y) loss += self._regularize() self.params = optimizer.step(self.params, loss.get_grad(self.params)) self.bias = optimizer.step(self.bias, loss.get_grad(self.bias))
def sgd_test(): # Load data if args.data_set == 'SwissRollData': Xtrain, Xtest, Ytrain, Ytest = loadSwissRollData() elif args.data_set == 'GMMData': Xtrain, Xtest, Ytrain, Ytest = loadGMMData() else: Xtrain, Xtest, Ytrain, Ytest = loadPeaksData() # Define set of learning rate and batch size (use only for testing) batch_size = np.geomspace(2, 2**8, 8) batch_size = [round_(i) for i in batch_size] # preprocess data - shuffle and split into different batch sizes (using batch_size list) Xtrain, Ytrain, test_sets, train_sets = preprocess_data( Xtest, Xtrain, Ytest, Ytrain) # train loop all_batches, all_labels = train_sets softmax = Softmax(Xtrain.shape[0] + 1, Ytrain.shape[0]) loss_func = CrossEntropy(softmax.W) opt = SGD(lr=args.lr) accs_hyper_params_train = [] accs_hyper_params_test = [] for e in range(args.iter): acc_train = [] loss_l = [] for batch, labels in tqdm(zip(all_batches, all_labels), total=len(all_batches), file=sys.stdout): labels = labels.T ones = np.ones((1, batch.shape[-1]), dtype=int) batch = np.concatenate((batch, ones), axis=0) loss = loss_func(batch, labels) loss_l.append(loss) loss_func.grad_w(batch, labels) softmax.W = opt.step(loss_func.grad_W, softmax.W) loss_func.W = softmax.W output = softmax(batch) # calculate train error labels = get_index(labels) prediction = predict(output) acc_train = np.append(acc_train, prediction == labels, axis=0) print('Epoch {} train acc: {} train loss: {}'.format( e, np.mean(acc_train), np.mean(loss_l))) accs_hyper_params_train.append(np.mean(acc_train)) accs_hyper_params_test.append( np.mean(test_accuracy(softmax, test_sets))) plt.plot(range(args.iter), accs_hyper_params_train, label='Train Accuracy') plt.plot(range(args.iter), accs_hyper_params_test, label='Validation Accuracy') plt.title('SGD test: {} Set, Acc of lr={} and batch size={}'.format( args.data_set, args.lr, args.batch_size)) plt.legend() plt.savefig( './Test_Figures/{} Set, Acc of lr={} and batch size={}.png'.format( args.data_set, args.lr, args.batch_size), transparent=True, bbox_inches='tight', pad_inches=0) plt.show()
error_function = MSE() repeats = 10 is_linearly_separable = [] for targets in all_targets: for idx in range(1, repeats + 1): data = DataHandler(inputs, targets) linearly_separable = False for jdx in range(updates): input, target = data.sample(batch_size) output = network.forward(input) train_error = error_function(target, output) network.backward(error=error_function.grad()) optimizer.step() full_input, full_target = data.full full_output = network.forward(full_input) accuracy = (np.sign(full_output) == full_target).mean() print("repeat: [{}/{}]".format(idx, repeats)) print("update: ({}/{})".format(jdx, updates)) print(" error: {:.6f}".format(train_error)) print(" accuracy: {}%\n".format(int(100 * accuracy))) if accuracy == 1: linearly_separable = True break if linearly_separable: break is_linearly_separable.append(linearly_separable)
class neural_net(object): def __init__(self, input_dims, layers_info, opts): self.layers_info = layers_info self.num_layers = len(layers_info) self.params = {} self.save_prefix = opts.save_prefix for ix in xrange(len(layers_info)): if ix == 0: input_dim = input_dims else: input_dim = layers_info[ix - 1][1] output_dim = layers_info[ix][1] if layers_info[ix][0] != "batchnorm": layer_object = DenseLayer(input_dim, output_dim, layers_info[ix][2], dropout=layers_info[ix][3]) else: layer_object = BatchNormLayer(input_dim) self.params[layers_info[ix][0] + "_{}".format(ix)] = layer_object.params setattr(self, 'layer_{}'.format(ix), layer_object) self.optimizer = SGD(self.params, 'categorical_cross_entropy', lr=opts.lr, l2_penalty=opts.l2, momentum=opts.momentum) def forward(self, input_tensor, test=False): output = input_tensor for ix in xrange(self.num_layers): output = getattr(self, 'layer_{}'.format(ix))(output, test=test) return output def backward(self, loss_grad): back_grad = loss_grad for ix in xrange(self.num_layers - 1, -1, -1): back_grad = getattr(self, 'layer_{}'.format(ix)).backward(back_grad) def save_params(self, filename): params = {} for layer in self.params: params[layer] = {} for param in self.params[layer]: params[layer][param] = self.params[layer][param].value if "batchnorm" in layer: params[layer]["buffers"] = {} index = int(layer.split('_')[-1]) for buffer_key in getattr( self, 'layer_{}'.format(index)).buffers: params[layer]["buffers"][buffer_key] = getattr( self, 'layer_{}'.format(index)).buffers[buffer_key] with open(filename, "wb") as f: cp.dump(params, f) def load_params(self, filename): saved_params = cp.load(open(filename)) for layer in saved_params: assert layer in self.params, "Tried to load layer %s, but layer not found" % ( layer) for param in saved_params[layer]: if param == "buffers": assert "batchnorm" in layer, "Error. Only BatchNorm currently has registered params" index = int(layer.split('_')[-1]) for buffer_key in saved_params[layer][param]: getattr(self, 'layer_{}'.format( index)).buffers[buffer_key] = saved_params[layer][ param][buffer_key] else: self.params[layer][param].value = saved_params[layer][ param] def compute_numerical_grad(self, layer, param, i, j, X, y, eps=0.0001): original_params = deepcopy(self.params[layer][param].value) self.params[layer][param].value[i][j] = original_params[i][j] + eps loss_pos, _ = self.optimizer.loss(y, self.forward(X)) self.params[layer][param].value[i][j] = original_params[i][j] - eps loss_neg, _ = self.optimizer.loss(y, self.forward(X)) num_grad = (loss_pos - loss_neg) / (2 * eps) self.params[layer][param].value = original_params return num_grad def test_layer_gradient(self, layer, param, X, y): max_abs_difference = -1 for i in xrange(self.params[layer][param].value.shape[0]): for j in xrange(self.params[layer][param].value.shape[1]): num_gradient = self.compute_numerical_grad( layer, param, i, j, X, y) abs_difference = abs( num_gradient - self.params[layer][param].grad[i][j]) / abs( num_gradient + self.params[layer][param].grad[i][j] + np.finfo(float).eps) max_abs_difference = max(abs_difference, max_abs_difference) return max_abs_difference def train_batch(self, X, y): self.optimizer.zero_grads() loss, loss_grad = self.optimizer.loss(y, self.forward(X)) self.backward(loss_grad) # print self.test_layer_gradient('hidden_0', 'b', X, y) self.optimizer.step() return loss def predict(self, X): output = self.forward(X, test=True) output = np.argmax(output, axis=-1) return output def fit(self, X_train, y_train, X_val, y_val, n_epochs=200, batch_size=32, return_history=False): y_labels_val = np.argmax(y_val, axis=-1) y_labels_train = np.argmax(y_train, axis=-1) bar = Progbar(n_epochs) if return_history: history = { 'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': [], 'best_val_acc': None, 'Model_Save_Prefix': self.save_prefix } best_val_acc = None for epoch in xrange(n_epochs): # Shuffle the training data index = np.arange(X_train.shape[0]) np.random.shuffle(index) X = X_train[index] y = y_train[index] train_loss = 0. for ix in xrange(0, X.shape[0], batch_size): batch_x = X[ix:ix + batch_size] batch_y = y[ix:ix + batch_size] loss_train = self.train_batch(batch_x, batch_y) train_loss += loss_train * batch_x.shape[0] train_loss /= X.shape[0] train_acc = accuracy_score(y_labels_train, self.predict(X_train)) # Computing Validation Metrics val_loss, _ = self.optimizer.loss(y_val, self.forward(X_val, test=True)) val_acc = accuracy_score(y_labels_val, self.predict(X_val)) if best_val_acc is None or val_acc > best_val_acc: best_val_acc = val_acc model_file = self.save_prefix + "acc_%.4f_epoch_%d" % ( val_acc, epoch + 1) self.save_params(model_file) if return_history: history['train_loss'].append(train_loss) history['val_loss'].append(val_loss) history['train_acc'].append(train_acc) history['val_acc'].append(val_acc) bar.update(epoch + 1, values=[("train_loss", train_loss), ("val_loss", val_loss), ("train_acc", train_acc), ("val_acc", val_acc)]) if return_history: history['best_val_acc'] = best_val_acc return history
import numpy as np from tensor import Tensor from layers import Sequential, Linear from activations import Tanh, Sigmoid from optimizers import SGD from losses import MSELoss np.random.seed(0) data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True) target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True) model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()]) criterion = MSELoss() optim = SGD(parameters=model.get_parameters(), alpha=1) for i in range(10): pred = model.forward(data) loss = criterion.forward(pred, target) loss.backward() optim.step() print(loss)