def make_agent(process_idx): n_hidden_channels = 50 if self.use_lstm: q_func = FCLSTMStateQFunction( ndim_obs, n_actions, n_hidden_channels=n_hidden_channels, n_hidden_layers=2) else: q_func = FCStateQFunctionWithDiscreteAction( ndim_obs, n_actions, n_hidden_channels=n_hidden_channels, n_hidden_layers=2) opt = rmsprop_async.RMSpropAsync(lr=1e-3, eps=1e-2, alpha=0.99) opt.setup(q_func) if self.explorer == 'epsilon_greedy': explorer = chainerrl.explorers.ConstantEpsilonGreedy( process_idx / 10, random_action_func) else: explorer = chainerrl.explorers.Boltzmann() return nsq.NSQ(q_func, opt, t_max=self.t_max, gamma=0.9, i_target=100, explorer=explorer)
def make_agent(process_idx): # Random epsilon assignment described in the original paper rand = random.random() if rand < 0.4: epsilon_target = 0.1 elif rand < 0.7: epsilon_target = 0.01 else: epsilon_target = 0.5 explorer = explorers.LinearDecayEpsilonGreedy( 1, epsilon_target, args.final_exploration_frames, action_space.sample) # Suppress the explorer logger explorer.logger.setLevel(logging.INFO) return nsq.NSQ(q_func, opt, t_max=5, gamma=0.99, i_target=40000, explorer=explorer, phi=dqn_phi)