def __init__(self, layers, lr_scheduler: LRScheduler = None, loss_function=None): self.layers = layers self.lr_scheduler = lr_scheduler self.loss_function = loss_function if self.loss_function is None: self.loss_function = SquaredError() if self.lr_scheduler is None: self.lr_scheduler = LRScheduler(1e-3)
class RMSprop(Optimizer): def __str__(self): return "RMSprop" def __init__(self, layers, lr_scheduler: LRScheduler = None, loss_function=None, opt_multiplier=0.95): self.layers = layers self.lr_scheduler = lr_scheduler self.loss_function = loss_function self.gamma = opt_multiplier self.accumulateds = [ np.zeros_like(layer.get_regularization_params()) for layer in layers ] if self.loss_function is None: self.loss_function = SquaredError() if self.lr_scheduler is None: self.lr_scheduler = LRScheduler(1e-3) def optimize(self, epoch, grads_, grads): lr = self.lr_scheduler.get_learning_rate(epoch) for i in range(len(self.layers) - 1, -1, -1): self.accumulateds[i] = self.gamma * self.accumulateds[i] + ( 1 - self.gamma) * grads[i]**2 grads[i] = grads[i] / (np.sqrt(self.accumulateds[i]) + 1e-12) self.layers[i].update(lr, grads_[i], grads[i])
class NesterovAG(Optimizer): def __str__(self): return "NesterovAG" def __init__(self, layers, lr_scheduler: LRScheduler = None, loss_function=None, opt_multiplier=0.9): self.layers = layers self.lr_scheduler = lr_scheduler self.loss_function = loss_function self.betta = opt_multiplier self.velocities = [ np.zeros_like(layer.get_regularization_params()) for layer in layers ] if self.loss_function is None: self.loss_function = SquaredError() if self.lr_scheduler is None: self.lr_scheduler = LRScheduler(1e-3) def optimize(self, epoch, grads_, grads): lr = self.lr_scheduler.get_learning_rate(epoch) for i in range(len(self.layers) - 1, -1, -1): self.velocities[i] = self.betta * self.velocities[i] + ( 1 - self.betta) * grads[i] self.layers[i].update(lr, grads_[i], self.velocities[i])
def __init__(self, layers, lr_scheduler: LRScheduler = None, loss_function=None): self.layers = layers self.lr_scheduler = lr_scheduler self.loss_function = loss_function self.accumulateds = [ np.zeros_like(layer.get_regularization_params()) for layer in layers ] if self.loss_function is None: self.loss_function = SquaredError() if self.lr_scheduler is None: self.lr_scheduler = LRScheduler(1e-3)
def __init__(self, layers, lr_scheduler: LRScheduler = None, loss_function=None, opt_multiplier=0.9): self.layers = layers self.lr_scheduler = lr_scheduler self.loss_function = loss_function self.betta = opt_multiplier self.velocities = [ np.zeros_like(layer.get_regularization_params()) for layer in layers ] if self.loss_function is None: self.loss_function = SquaredError() if self.lr_scheduler is None: self.lr_scheduler = LRScheduler(1e-3)
class SGD(Optimizer): def __str__(self): return "SGD" def __init__(self, layers, lr_scheduler: LRScheduler = None, loss_function=None): self.layers = layers self.lr_scheduler = lr_scheduler self.loss_function = loss_function if self.loss_function is None: self.loss_function = SquaredError() if self.lr_scheduler is None: self.lr_scheduler = LRScheduler(1e-3) def optimize(self, epoch, grads_, grads): lr = self.lr_scheduler.get_learning_rate(epoch) for i in range(len(self.layers) - 1, 0 - 1, -1): self.layers[i].update(lr, grads_[i], grads[i])
pred_test = nn.forward_pass(x_test) test_acc = calculate_accuracy(prediction=pred_test, target=y_test, one_hot_encoding=True, classification=True) print('Accuracy on the test set:', test_acc) max_epochs = 35 l_rate = 1e-3 loss_f = losses.CrossEntropy() lr_scheduler = LRScheduler(l_rate, schedule={ 10: 5e-4, 20: 1e-5, 25: 5e-5, 30: 1e-5 }) optimizer = optimizers.SGD(nn.layers, lr_scheduler=lr_scheduler, loss_function=loss_f) train_loss_list, train_accuracy_list, valid_loss_list, valid_accuracy_list = nn.fit( train_set=(x_train, y_train), valid_set=(x_test, y_test), batch_size=128, max_epochs=max_epochs, optimizer=optimizer, model_saver=ModelSaver(model_name='mlp', folder_name='mnist_models',
regularization=regularization, connected_to=nn.layers[-1])) nn.push_layer( layers.FullyConnected(outputs=num_classes, activation=activations.SoftMax(), regularization=regularization, connected_to=nn.layers[-1])) print(nn) max_epochs = 5 l_rate = 1e-5 loss_f = losses.CrossEntropy() optimizer = optimizers.Adam(nn.layers, lr_scheduler=LRScheduler(l_rate), loss_function=loss_f) pred_test = nn.forward_pass(x_test, report=1) test_acc = calculate_accuracy(prediction=pred_test, target=y_test, one_hot_encoding=True, classification=True) print('Accuracy on the test set:', test_acc) train_loss_list, train_accuracy_list, valid_loss_list, valid_accuracy_list = nn.fit( train_set=(x_train, y_train), valid_set=(x_test, y_test), batch_size=500, max_epochs=max_epochs, optimizer=optimizer,