Python RLAlgorithm.learn 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: hbaselines.algorithms

클래스/타입: RLAlgorithm

메소드/함수: learn

hotexamples.com에서의 예제들: 3

Python RLAlgorithm.learn - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 hbaselines.algorithms.RLAlgorithm.learn에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

RLAlgorithm(9)

learn(3)

_evaluate(1)

_log_eval(1)

load(1)

saver(1)

예제 #1

파일 보기

def run_exp(env,
            policy,
            hp,
            steps,
            dir_name,
            evaluate,
            seed,
            eval_interval,
            log_interval,
            save_interval,
            initial_exploration_steps):
    """Run a single training procedure.

    Parameters
    ----------
    env : str or gym.Env
        the training/testing environment
    policy : type [ hbaselines.base_policies.Policy ]
        the policy class to use
    hp : dict
        additional algorithm hyper-parameters
    steps : int
        total number of training steps
    dir_name : str
        the location the results files are meant to be stored
    evaluate : bool
        whether to include an evaluation environment
    seed : int
        specified the random seed for numpy, tensorflow, and random
    eval_interval : int
        number of simulation steps in the training environment before an
        evaluation is performed
    log_interval : int
        the number of training steps before logging training results
    save_interval : int
        number of simulation steps in the training environment before the model
        is saved
    initial_exploration_steps : int
        number of timesteps that the policy is run before training to
        initialize the replay buffer with samples
    """
    eval_env = env if evaluate else None

    alg = RLAlgorithm(
        policy=policy,
        env=env,
        eval_env=eval_env,
        **hp
    )

    # perform training
    alg.learn(
        total_steps=steps,
        log_dir=dir_name,
        log_interval=log_interval,
        eval_interval=eval_interval,
        save_interval=save_interval,
        initial_exploration_steps=initial_exploration_steps,
        seed=seed,
    )

예제 #2

파일 보기

    def test_learn_initial_exploration_steps(self):
        """Test the initial_exploration_steps parameter in the learn method.

        This is done for the following cases:

        1. initial_exploration_steps= = 0
        2. initial_exploration_steps= = 100
        """
        # =================================================================== #
        # test case 1                                                         #
        # =================================================================== #

        # Create the algorithm object.
        policy_params = self.init_parameters.copy()
        policy_params['policy'] = FeedForwardPolicy
        policy_params['_init_setup_model'] = True
        alg = RLAlgorithm(**policy_params)

        # Run the learn operation for zero exploration steps.
        alg.learn(0, log_dir='results', initial_exploration_steps=0)

        # Check the size of the replay buffer
        self.assertEqual(len(alg.policy_tf.replay_buffer), 0)

        # Clear memory.
        del alg
        shutil.rmtree('results')

        # =================================================================== #
        # test case 2                                                         #
        # =================================================================== #

        # Create the algorithm object.
        policy_params = self.init_parameters.copy()
        policy_params['policy'] = FeedForwardPolicy
        policy_params['_init_setup_model'] = True
        alg = RLAlgorithm(**policy_params)

        # Run the learn operation for zero exploration steps.
        alg.learn(0, log_dir='results', initial_exploration_steps=100)

        # Check the size of the replay buffer
        self.assertEqual(len(alg.policy_tf.replay_buffer), 100)

        # Clear memory.
        del alg
        shutil.rmtree('results')

예제 #3

파일 보기

    def test_learn_init(self):
        """Test the non-loop components of the `learn` method."""
        # Create the algorithm object.
        policy_params = self.init_parameters.copy()
        policy_params['policy'] = GoalConditionedPolicy
        policy_params['_init_setup_model'] = True
        alg = RLAlgorithm(**policy_params)

        # Run the learn operation for zero steps.
        alg.learn(0, log_dir='results', initial_exploration_steps=0)
        self.assertEqual(alg.episodes, 0)
        self.assertEqual(alg.total_steps, 0)
        self.assertEqual(alg.epoch, 0)
        self.assertEqual(len(alg.episode_rew_history), 0)
        self.assertEqual(alg.epoch_episodes, 0)
        self.assertEqual(len(alg.epoch_episode_rewards), 0)
        self.assertEqual(len(alg.epoch_episode_steps), 0)
        shutil.rmtree('results')

        # Test the seeds.
        alg.learn(0, log_dir='results', seed=1, initial_exploration_steps=0)
        self.assertEqual(np.random.sample(), 0.417022004702574)
        self.assertEqual(random.uniform(0, 1), 0.13436424411240122)
        shutil.rmtree('results')