def _build_for_dueling_dqn(self) -> Dict[str, Any]:
        config_dict = self._build_for_dqn()
        config_dict.update({'name': os.path.join(self.folder, 'DuelingDQN'),
                            'model_architecture': ConvNN(observation_shape=(84, 84, self.frame_depth), dueling=True,
                                                         n_actions=6, opt='adam', learning_rate=0.000102)})

        return config_dict
 def _build_for_dqn(self) -> Dict[str, Any]:
     return {
         'name':
         os.path.join(self.folder, 'DeepQAgent'),
         'env_spec':
         self.env_spec,
         'env_wrappers':
         self.env_wrappers,
         'model_architecture':
         ConvNN(observation_shape=(84, 84, self.frame_depth),
                n_actions=6,
                output_activation=None,
                opt='adam',
                learning_rate=0.00008),
         'gamma':
         0.99,
         'final_reward':
         None,
         'eps':
         EpsilonGreedy(eps_initial=2,
                       decay=0.000025,
                       eps_min=0.01,
                       decay_schedule='linear'),
         'replay_buffer':
         ContinuousBuffer(buffer_size=40000),
         'replay_buffer_samples':
         32
     }
 def _build_for_dqn(self) -> Dict[str, Any]:
     return {
         'name':
         os.path.join(self.folder, 'DeepQAgent'),
         'env_spec':
         self.env_spec,
         'env_wrappers':
         self.env_wrappers,
         'model_architecture':
         ConvNN(observation_shape=(84, 84, self.frame_depth),
                n_actions=6,
                output_activation=None,
                opt='adam',
                learning_rate=0.000105),
         'gamma':
         0.99,
         'final_reward':
         None,
         # Use eps_initial > 1 here so only random actions used for first steps, which will make filling the
         # replay buffer more efficient. It'll also avoid decaying eps while not training.
         # Alternative: 'eps': EpsilonGreedy(eps_initial=1.2, decay=0.000025, eps_min=0.01,
         #                                   decay_schedule='compound'),
         'eps':
         EpsilonGreedy(eps_initial=1.1,
                       decay=0.00001,
                       eps_min=0.01,
                       decay_schedule='linear'),
         'replay_buffer':
         ContinuousBuffer(buffer_size=10000),
         'replay_buffer_samples':
         32
     }
 def _build_for_ac(self) -> Dict[str, Any]:
     return {'name': os.path.join(self.folder, 'ActorCriticAgent'),
             'env_spec': self.env_spec,
             'env_wrappers': self.env_wrappers,
             'model_architecture': ConvNN(observation_shape=(84, 84, self.frame_depth), n_actions=6,
                                          output_activation='softmax', opt='adam', learning_rate=0.000105,
                                          output_type='ac'),
             'gamma': 0.99,
             'final_reward': None}
    def _build_for_double_dueling_dqn(self) -> Dict[str, Any]:
        config_dict = self._build_for_dqn()
        config_dict.update({'name': os.path.join(self.folder, 'DoubleDuelingDQN'),
                            'double': True,
                            'model_architecture': ConvNN(
                                observation_shape=(self.target_obs_shape[0], self.target_obs_shape[1],
                                                   self.frame_depth), dueling=True,
                                n_actions=self.n_actions, opt='adam', learning_rate=0.0001)})

        return config_dict