def run(self, parameter, parameterName, loss, **kwargs) : pVar = parameter.getVar() gparam = tt.grad(loss, pVar) updates = LUP.adamax( [ gparam ], [pVar], learning_rate=self.getHP("lr"), beta1=self.getHP("beta1"), beta2=self.getHP("beta2"), epsilon=self.getHP("epsilon")) ret = OptimizerResult(pVar, parameterName, gparam, updates[pVar]) i = 0 for param, update in updates.items() : if param is not pVar : name = "%s_adamax_%s" % (parameterName, i) ret.addCoParameter(param, name, None, update) i += 1 return ret
def get_updates(nnet, train_obj, trainable_params, solver=None): implemented_solvers = ("sgd", "momentum", "nesterov", "adagrad", "rmsprop", "adadelta", "adam", "adamax") if solver not in implemented_solvers: nnet.sgd_solver = "adam" else: nnet.sgd_solver = solver if nnet.sgd_solver == "sgd": updates = l_updates.sgd(train_obj, trainable_params, learning_rate=Cfg.learning_rate) elif nnet.sgd_solver == "momentum": updates = l_updates.momentum(train_obj, trainable_params, learning_rate=Cfg.learning_rate, momentum=Cfg.momentum) elif nnet.sgd_solver == "nesterov": updates = l_updates.nesterov_momentum(train_obj, trainable_params, learning_rate=Cfg.learning_rate, momentum=Cfg.momentum) elif nnet.sgd_solver == "adagrad": updates = l_updates.adagrad(train_obj, trainable_params, learning_rate=Cfg.learning_rate) elif nnet.sgd_solver == "rmsprop": updates = l_updates.rmsprop(train_obj, trainable_params, learning_rate=Cfg.learning_rate, rho=Cfg.rho) elif nnet.sgd_solver == "adadelta": updates = l_updates.adadelta(train_obj, trainable_params, learning_rate=Cfg.learning_rate, rho=Cfg.rho) elif nnet.sgd_solver == "adam": updates = l_updates.adam(train_obj, trainable_params, learning_rate=Cfg.learning_rate) elif nnet.sgd_solver == "adamax": updates = l_updates.adamax(train_obj, trainable_params, learning_rate=Cfg.learning_rate) return updates