def run_exp(env, policy, hp, steps, dir_name, evaluate, seed, eval_interval, log_interval, save_interval, initial_exploration_steps): """Run a single training procedure. Parameters ---------- env : str or gym.Env the training/testing environment policy : type [ hbaselines.base_policies.Policy ] the policy class to use hp : dict additional algorithm hyper-parameters steps : int total number of training steps dir_name : str the location the results files are meant to be stored evaluate : bool whether to include an evaluation environment seed : int specified the random seed for numpy, tensorflow, and random eval_interval : int number of simulation steps in the training environment before an evaluation is performed log_interval : int the number of training steps before logging training results save_interval : int number of simulation steps in the training environment before the model is saved initial_exploration_steps : int number of timesteps that the policy is run before training to initialize the replay buffer with samples """ eval_env = env if evaluate else None alg = RLAlgorithm( policy=policy, env=env, eval_env=eval_env, **hp ) # perform training alg.learn( total_steps=steps, log_dir=dir_name, log_interval=log_interval, eval_interval=eval_interval, save_interval=save_interval, initial_exploration_steps=initial_exploration_steps, seed=seed, )
def test_learn_initial_exploration_steps(self): """Test the initial_exploration_steps parameter in the learn method. This is done for the following cases: 1. initial_exploration_steps= = 0 2. initial_exploration_steps= = 100 """ # =================================================================== # # test case 1 # # =================================================================== # # Create the algorithm object. policy_params = self.init_parameters.copy() policy_params['policy'] = FeedForwardPolicy policy_params['_init_setup_model'] = True alg = RLAlgorithm(**policy_params) # Run the learn operation for zero exploration steps. alg.learn(0, log_dir='results', initial_exploration_steps=0) # Check the size of the replay buffer self.assertEqual(len(alg.policy_tf.replay_buffer), 0) # Clear memory. del alg shutil.rmtree('results') # =================================================================== # # test case 2 # # =================================================================== # # Create the algorithm object. policy_params = self.init_parameters.copy() policy_params['policy'] = FeedForwardPolicy policy_params['_init_setup_model'] = True alg = RLAlgorithm(**policy_params) # Run the learn operation for zero exploration steps. alg.learn(0, log_dir='results', initial_exploration_steps=100) # Check the size of the replay buffer self.assertEqual(len(alg.policy_tf.replay_buffer), 100) # Clear memory. del alg shutil.rmtree('results')
def test_learn_init(self): """Test the non-loop components of the `learn` method.""" # Create the algorithm object. policy_params = self.init_parameters.copy() policy_params['policy'] = GoalConditionedPolicy policy_params['_init_setup_model'] = True alg = RLAlgorithm(**policy_params) # Run the learn operation for zero steps. alg.learn(0, log_dir='results', initial_exploration_steps=0) self.assertEqual(alg.episodes, 0) self.assertEqual(alg.total_steps, 0) self.assertEqual(alg.epoch, 0) self.assertEqual(len(alg.episode_rew_history), 0) self.assertEqual(alg.epoch_episodes, 0) self.assertEqual(len(alg.epoch_episode_rewards), 0) self.assertEqual(len(alg.epoch_episode_steps), 0) shutil.rmtree('results') # Test the seeds. alg.learn(0, log_dir='results', seed=1, initial_exploration_steps=0) self.assertEqual(np.random.sample(), 0.417022004702574) self.assertEqual(random.uniform(0, 1), 0.13436424411240122) shutil.rmtree('results')