def test_call_hooks_uninitialized_param(self): target = UninitializedChain() opt = optimizers.MomentumSGD() opt.setup(target) opt.add_hook(optimizer.Lasso(rate=0.0005)) target(np.ones((4, 10), dtype=np.float32)) opt.call_hooks()
def check_lasso(self): w = self.target.param.data g = self.target.param.grad xp = cuda.get_array_module(w) decay = 0.2 expect = w - g - decay * xp.sign(w) opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(optimizer.Lasso(decay)) opt.update() testing.assert_allclose(expect, w)
def __init__(self, agent, memory_size=10**4, replay_size=32, gamma=0.99, initial_exploration=10**4, target_update_freq=10**4, learning_rate=0.00025, epsilon_decay=1e-6, minimum_epsilon=0.1, L1_rate=None): self.agent = agent self.target = Q(self.agent.q.n_history, self.agent.q.n_action, on_gpu=self.agent.q.on_gpu) self.memory_size = memory_size self.replay_size = replay_size self.gamma = gamma self.initial_exploration = initial_exploration self.target_update_freq = target_update_freq self.laerning_rate = learning_rate self.epslon_decay = epsilon_decay self.minimum_epsilon = minimum_epsilon self._step = 0 # prepare for replay n_hist = self.agent.q.n_history size = self.agent.q.SIZE self.memory = [ np.zeros((memory_size, n_hist, 3, size, size), dtype=np.float32), np.zeros(memory_size, dtype=np.uint8), np.zeros((memory_size, 1), dtype=np.float32), np.zeros((memory_size, n_hist, 3, size, size), dtype=np.float32), np.zeros((memory_size, 1), dtype=np.bool) ] self.memory_text = [ "state", "action", "reward", "next_state", "episode_end" ] #prepare optimizer self.optimizer = optimizers.RMSpropGraves(lr=learning_rate, alpha=0.95, momentum=0.95, eps=0.01) self.optimizer.setup(self.agent.q) if L1_rate is not None: self.optimizer.add_hook(optimizer.Lasso(L1_rate)) self._loss = 9 self._qv = 0
trs = False train_iter = iterators.SerialIterator(dataset_train, batch_size=args.batchsize, shuffle=trs) if args.numval > 0: val_iter = iterators.SerialIterator(dataset_val, batch_size=len(dataset_val), repeat=False, shuffle=False) # -- Set optimizers optimizer1 = use_optimizer(lr=args.learning_rate) optimizer1.setup(loss.phi) optimizer2 = use_optimizer(lr=args.learning_rate) optimizer2.setup(loss.net) optimizer1.add_hook(optimizer_module.Lasso(args.beta)) optimizer2.add_hook(optimizer_module.WeightDecay(args.gamma)) # -- Set a trigger if args.log_in_iteration: trigger = (1, 'iteration') else: trigger = (1, 'epoch') # -- Set a trainer if args.fixed_embedder: optimizer_dict = {'net': optimizer2} else: optimizer_dict = {'phi': optimizer1, 'net': optimizer2} updater = lkis.Updater(train_iter, optimizer_dict,