def optim(self, learning_rate): w, b = self.w, self.b # print("fc layer, optim learning_rate", learning_rate) w.data, w.m, w.v, w.t = adam(w.data, w.grad, learning_rate=learning_rate, m=w.m, v=w.v, t=w.t) b.data, b.m, b.v, b.t = adam(b.data, b.grad, learning_rate=learning_rate, m=b.m, v=b.v, t=b.t)
def sgd_fun_adam_avg(obj_fun, grad_fun, x0, max_iters, callback, lr_fun): return opt.adam(obj_fun, grad_fun, x0, max_iters, callback, lr_fun, avgdecay=0.99)