Python RLAlgorithm.learn Examples

Programming Language: Python

Namespace/Package Name: hbaselines.algorithms

Class/Type: RLAlgorithm

Method/Function: learn

Examples at hotexamples.com: 3

Python RLAlgorithm.learn - 3 examples found. These are the top rated real world Python examples of hbaselines.algorithms.RLAlgorithm.learn extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RLAlgorithm(9)

learn(3)

_evaluate(1)

_log_eval(1)

load(1)

saver(1)

Example #1

Show file

def run_exp(env,
            policy,
            hp,
            steps,
            dir_name,
            evaluate,
            seed,
            eval_interval,
            log_interval,
            save_interval,
            initial_exploration_steps):
    """Run a single training procedure.

    Parameters
    ----------
    env : str or gym.Env
        the training/testing environment
    policy : type [ hbaselines.base_policies.Policy ]
        the policy class to use
    hp : dict
        additional algorithm hyper-parameters
    steps : int
        total number of training steps
    dir_name : str
        the location the results files are meant to be stored
    evaluate : bool
        whether to include an evaluation environment
    seed : int
        specified the random seed for numpy, tensorflow, and random
    eval_interval : int
        number of simulation steps in the training environment before an
        evaluation is performed
    log_interval : int
        the number of training steps before logging training results
    save_interval : int
        number of simulation steps in the training environment before the model
        is saved
    initial_exploration_steps : int
        number of timesteps that the policy is run before training to
        initialize the replay buffer with samples
    """
    eval_env = env if evaluate else None

    alg = RLAlgorithm(
        policy=policy,
        env=env,
        eval_env=eval_env,
        **hp
    )

    # perform training
    alg.learn(
        total_steps=steps,
        log_dir=dir_name,
        log_interval=log_interval,
        eval_interval=eval_interval,
        save_interval=save_interval,
        initial_exploration_steps=initial_exploration_steps,
        seed=seed,
    )

Example #2

Show file

    def test_learn_initial_exploration_steps(self):
        """Test the initial_exploration_steps parameter in the learn method.

        This is done for the following cases:

        1. initial_exploration_steps= = 0
        2. initial_exploration_steps= = 100
        """
        # =================================================================== #
        # test case 1                                                         #
        # =================================================================== #

        # Create the algorithm object.
        policy_params = self.init_parameters.copy()
        policy_params['policy'] = FeedForwardPolicy
        policy_params['_init_setup_model'] = True
        alg = RLAlgorithm(**policy_params)

        # Run the learn operation for zero exploration steps.
        alg.learn(0, log_dir='results', initial_exploration_steps=0)

        # Check the size of the replay buffer
        self.assertEqual(len(alg.policy_tf.replay_buffer), 0)

        # Clear memory.
        del alg
        shutil.rmtree('results')

        # =================================================================== #
        # test case 2                                                         #
        # =================================================================== #

        # Create the algorithm object.
        policy_params = self.init_parameters.copy()
        policy_params['policy'] = FeedForwardPolicy
        policy_params['_init_setup_model'] = True
        alg = RLAlgorithm(**policy_params)

        # Run the learn operation for zero exploration steps.
        alg.learn(0, log_dir='results', initial_exploration_steps=100)

        # Check the size of the replay buffer
        self.assertEqual(len(alg.policy_tf.replay_buffer), 100)

        # Clear memory.
        del alg
        shutil.rmtree('results')

Example #3

Show file

    def test_learn_init(self):
        """Test the non-loop components of the `learn` method."""
        # Create the algorithm object.
        policy_params = self.init_parameters.copy()
        policy_params['policy'] = GoalConditionedPolicy
        policy_params['_init_setup_model'] = True
        alg = RLAlgorithm(**policy_params)

        # Run the learn operation for zero steps.
        alg.learn(0, log_dir='results', initial_exploration_steps=0)
        self.assertEqual(alg.episodes, 0)
        self.assertEqual(alg.total_steps, 0)
        self.assertEqual(alg.epoch, 0)
        self.assertEqual(len(alg.episode_rew_history), 0)
        self.assertEqual(alg.epoch_episodes, 0)
        self.assertEqual(len(alg.epoch_episode_rewards), 0)
        self.assertEqual(len(alg.epoch_episode_steps), 0)
        shutil.rmtree('results')

        # Test the seeds.
        alg.learn(0, log_dir='results', seed=1, initial_exploration_steps=0)
        self.assertEqual(np.random.sample(), 0.417022004702574)
        self.assertEqual(random.uniform(0, 1), 0.13436424411240122)
        shutil.rmtree('results')