Example #1
0
 def test_saving_scheduler_on_all_model_free_algo(self):
     to_test_algo_func = (self.create_ppo, self.create_dqn, self.create_ddpg)
     for func in to_test_algo_func:
         self.setUp()
         single_exp_runner(_saving_scheduler(self, func), auto_choose_gpu_flag=False, gpu_id=0,
                           del_if_log_path_existed=True)
         self.tearDown()
Example #2
0
    def test_exp_with_scheduler(self, algo=None, locals=None):
        def wrap_algo(algo=None, locals=None):
            def func(algo=algo, locals=locals):
                GlobalConfig().set('DEFAULT_EXPERIMENT_END_POINT', dict(TOTAL_AGENT_TRAIN_SAMPLE_COUNT=500,
                                                                        TOTAL_AGENT_TEST_SAMPLE_COUNT=None,
                                                                        TOTAL_AGENT_UPDATE_COUNT=None))
                if not algo:
                    algo, locals = self.create_dqn()
                env_spec = locals['env_spec']
                env = locals['env']
                agent = self.create_agent(env=locals['env'],
                                          algo=algo,
                                          name='agent',
                                          eps=self.create_eps(env_spec)[0],
                                          env_spec=env_spec)[0]

                exp = self.create_exp(name='model_free', env=env, agent=agent)
                algo.parameters.set_scheduler(param_key='LEARNING_RATE',
                                              to_tf_ph_flag=True,
                                              scheduler=LinearSchedule(
                                                  t_fn=exp.TOTAL_ENV_STEP_TRAIN_SAMPLE_COUNT,
                                                  schedule_timesteps=GlobalConfig().DEFAULT_EXPERIMENT_END_POINT[
                                                      'TOTAL_AGENT_TRAIN_SAMPLE_COUNT'],
                                                  final_p=0.0001,
                                                  initial_p=0.01))
                exp.run()
                self.assertEqual(exp.TOTAL_AGENT_TEST_SAMPLE_COUNT(), exp.TOTAL_ENV_STEP_TEST_SAMPLE_COUNT())
                self.assertEqual(exp.TOTAL_AGENT_TRAIN_SAMPLE_COUNT(), exp.TOTAL_ENV_STEP_TRAIN_SAMPLE_COUNT(), 500)

            return func

        single_exp_runner(wrap_algo(algo, locals), auto_choose_gpu_flag=False, gpu_id=0,
                          del_if_log_path_existed=True)
Example #3
0
 def test_saving_scheduler_on_all_model_free_algo(self):
     to_test_algo_func = (self.create_ppo, self.create_dqn,
                          self.create_ddpg)
     sample_traj_flag = (True, False, False)
     for i, func in enumerate(to_test_algo_func):
         self.setUp()
         single_exp_runner(_saving_scheduler(
             self, func, sample_traj_flag=sample_traj_flag[i]),
                           auto_choose_gpu_flag=False,
                           gpu_id=0,
                           del_if_log_path_existed=True)
         self.tearDown()
Example #4
0
    def test_experiment(self):
        def func():
            GlobalConfig().set('DEFAULT_EXPERIMENT_END_POINT', dict(TOTAL_AGENT_TRAIN_SAMPLE_COUNT=200,
                                                                    TOTAL_AGENT_TEST_SAMPLE_COUNT=None,
                                                                    TOTAL_AGENT_UPDATE_COUNT=None))
            dqn, locals = self.create_dqn()
            env_spec = locals['env_spec']
            env = locals['env']
            agent = self.create_agent(env=locals['env'],
                                      algo=dqn,
                                      name='agent',
                                      eps=self.create_eps(env_spec)[0],
                                      env_spec=env_spec)[0]
            exp = self.create_exp(name='model_free', env=env, agent=agent)
            exp.run()

        single_exp_runner(func, auto_choose_gpu_flag=False, gpu_id=0, del_if_log_path_existed=True)
Example #5
0
        start_test_after_sample_count=5,
        start_train_after_sample_count=5,
        train_func_and_args=(agent.train, (), dict()),
        test_func_and_args=(agent.test, (), dict(sample_count=10)),
        sample_func_and_args=(agent.sample, (), dict(sample_count=100,
                                                     env=agent.env,
                                                     store_flag=True))
    )
    experiment = Experiment(
        tuner=None,
        env=env,
        agent=agent,
        flow=flow,
        name=name + 'experiment_debug'
    )

    dqn.parameters.set_scheduler(param_key='LEARNING_RATE',
                                 scheduler=LinearScheduler(
                                     t_fn=experiment.TOTAL_AGENT_TRAIN_SAMPLE_COUNT,
                                     schedule_timesteps=GlobalConfig().DEFAULT_EXPERIMENT_END_POINT[
                                         'TOTAL_AGENT_TRAIN_SAMPLE_COUNT'],
                                     final_p=0.0001,
                                     initial_p=0.01))
    experiment.run()


from baconian.core.experiment_runner import single_exp_runner

GlobalConfig().set('DEFAULT_LOG_PATH', './log_path')
single_exp_runner(task_fn, del_if_log_path_existed=True)
                       noise_adder=noise_adder,
                       name=name + '_agent')

    flow = create_train_test_flow(env=env,
                                  cyber=cyber,
                                  agent=agent,
                                  num_test=num_test,
                                  total_steps=total_steps,
                                  max_step_per_episode=max_step_per_episode,
                                  train_after_step=train_after_step,
                                  test_after_step=test_after_step,
                                  train_every_step=train_every_step,
                                  test_every_step=test_every_step,
                                  train_func_and_args=(agent.train, (),
                                                       dict()),
                                  test_func_and_args=(agent.test, (), dict()),
                                  sample_func_and_args=(agent.sample, (),
                                                        dict()),
                                  flow_type='DDPG_TrainTestFlow')

    experiment = Experiment(tuner=None,
                            env=env,
                            agent=agent,
                            flow=flow,
                            name=name)
    experiment.run()


if __name__ == '__main__':
    single_exp_runner(task_fn, del_if_log_path_existed=True, keep_session=True)