def _build_for_dueling_dqn(self) -> Dict[str, Any]: config_dict = self._build_for_dqn() config_dict.update({'name': os.path.join(self.folder, 'DuelingDQN'), 'model_architecture': ConvNN(observation_shape=(84, 84, self.frame_depth), dueling=True, n_actions=6, opt='adam', learning_rate=0.000102)}) return config_dict
def _build_for_dqn(self) -> Dict[str, Any]: return { 'name': os.path.join(self.folder, 'DeepQAgent'), 'env_spec': self.env_spec, 'env_wrappers': self.env_wrappers, 'model_architecture': ConvNN(observation_shape=(84, 84, self.frame_depth), n_actions=6, output_activation=None, opt='adam', learning_rate=0.00008), 'gamma': 0.99, 'final_reward': None, 'eps': EpsilonGreedy(eps_initial=2, decay=0.000025, eps_min=0.01, decay_schedule='linear'), 'replay_buffer': ContinuousBuffer(buffer_size=40000), 'replay_buffer_samples': 32 }
def _build_for_dqn(self) -> Dict[str, Any]: return { 'name': os.path.join(self.folder, 'DeepQAgent'), 'env_spec': self.env_spec, 'env_wrappers': self.env_wrappers, 'model_architecture': ConvNN(observation_shape=(84, 84, self.frame_depth), n_actions=6, output_activation=None, opt='adam', learning_rate=0.000105), 'gamma': 0.99, 'final_reward': None, # Use eps_initial > 1 here so only random actions used for first steps, which will make filling the # replay buffer more efficient. It'll also avoid decaying eps while not training. # Alternative: 'eps': EpsilonGreedy(eps_initial=1.2, decay=0.000025, eps_min=0.01, # decay_schedule='compound'), 'eps': EpsilonGreedy(eps_initial=1.1, decay=0.00001, eps_min=0.01, decay_schedule='linear'), 'replay_buffer': ContinuousBuffer(buffer_size=10000), 'replay_buffer_samples': 32 }
def _build_for_ac(self) -> Dict[str, Any]: return {'name': os.path.join(self.folder, 'ActorCriticAgent'), 'env_spec': self.env_spec, 'env_wrappers': self.env_wrappers, 'model_architecture': ConvNN(observation_shape=(84, 84, self.frame_depth), n_actions=6, output_activation='softmax', opt='adam', learning_rate=0.000105, output_type='ac'), 'gamma': 0.99, 'final_reward': None}
def _build_for_double_dueling_dqn(self) -> Dict[str, Any]: config_dict = self._build_for_dqn() config_dict.update({'name': os.path.join(self.folder, 'DoubleDuelingDQN'), 'double': True, 'model_architecture': ConvNN( observation_shape=(self.target_obs_shape[0], self.target_obs_shape[1], self.frame_depth), dueling=True, n_actions=self.n_actions, opt='adam', learning_rate=0.0001)}) return config_dict