예제 #1
0
 def _build_for_dqn(self) -> Dict[str, Any]:
     """This isn't tuned."""
     return {
         'name':
         os.path.join(self.folder, 'DeepQAgent'),
         'env_spec':
         self.env_spec,
         'model_architecture':
         DenseNN(observation_shape=(2, ),
                 n_actions=3,
                 opt='adam',
                 learning_rate=0.001,
                 unit_scale=12,
                 dueling=False),
         'gamma':
         0.99,
         'final_reward':
         650,
         'replay_buffer_samples':
         32,
         'eps':
         EpsilonGreedy(eps_initial=0.1,
                       decay=0.002,
                       eps_min=0.002,
                       actions_pool=list(range(3))),
         'replay_buffer':
         ContinuousBuffer(buffer_size=200)
     }
    def _build_for_dueling_dqn(self) -> Dict[str, Any]:
        config_dict = self._build_for_dqn()
        config_dict.update({'name': os.path.join(self.folder, 'DuelingDQN'),
                            'model_architecture': DenseNN(observation_shape=(4,), n_actions=2, opt='adam',
                                                          learning_rate=0.0001, unit_scale=16, dueling=True)})

        return config_dict
 def _build_for_reinforce(self) -> Dict[str, Any]:
     return {'name': os.path.join(self.folder, 'REINFORCEAgent'),
             'env_spec': self.env_spec,
             'model_architecture': DenseNN(observation_shape=(4,), n_actions=2, opt='adam', unit_scale=16,
                                           output_activation='softmax', learning_rate=0.001, dueling=False),
             'final_reward': -2,
             'gamma': 0.99,
             'alpha': 0.00001}
 def _build_with_dense_model(dueling: bool = False) -> Dict[str, Any]:
     return {"model_architecture": DenseNN(observation_shape=(115,), n_actions=19, dueling=dueling,
                                           output_activation=None, opt='adam', learning_rate=0.000105)}