Python AgentStats.fit 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: rlberry.stats

클래스/타입: AgentStats

메소드/함수: fit

hotexamples.com에서의 예제들: 3

Python AgentStats.fit - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 rlberry.stats.AgentStats.fit에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

AgentStats(23)

load(5)

optimize_hyperparams(5)

save(4)

fit(3)

partial_fit(3)

set_writer(3)

예제 #1

파일 보기

파일: test_agent_stats.py 프로젝트: antoine-moulin/rlberry

def test_agent_stats_partial_fit_and_tuple_env():
    # Define train and evaluation envs
    train_env = (GridWorld, None
                 )  # tuple (constructor, kwargs) must also work in AgentStats

    # Parameters
    params = {"n_episodes": 500}
    horizon = 20

    # Run AgentStats
    stats = AgentStats(DummyAgent,
                       train_env,
                       init_kwargs=params,
                       n_fit=4,
                       eval_horizon=10)
    stats2 = AgentStats(DummyAgent,
                        train_env,
                        init_kwargs=params,
                        n_fit=4,
                        eval_horizon=10)
    # set some writers
    stats.set_writer(0, None)
    stats.set_writer(3, None)

    # Run partial fit
    stats.partial_fit(0.1)
    stats.partial_fit(0.5)
    for agent in stats.fitted_agents:
        assert agent.fraction_fitted == 0.6
    for _ in range(2):
        stats.partial_fit(0.5)
        for agent in stats.fitted_agents:
            assert agent.fraction_fitted == 1.0

    # Run fit
    stats2.fit()

    # learning curves
    plot_episode_rewards([stats], cumulative=True, show=False)

    # compare final policies
    compare_policies([stats], eval_horizon=horizon, n_sim=10, show=False)

예제 #2

파일 보기

def test_agent_stats_seeding():
    sd.set_global_seed(3456)
    for env in [MountainCar(), (gym_make, {'env_name': 'MountainCar-v0'})]:
        agent_stats = AgentStats(RSUCBVIAgent,
                                 env,
                                 init_kwargs={
                                     'n_episodes': 2,
                                     'horizon': 10
                                 },
                                 n_fit=6)
        agent_stats.fit()

        for ii in range(2, agent_stats.n_fit):
            traj1 = get_env_trajectory(agent_stats.fitted_agents[ii - 2].env,
                                       horizon=10)
            traj2 = get_env_trajectory(agent_stats.fitted_agents[ii - 1].env,
                                       horizon=10)
            traj3 = get_env_trajectory(agent_stats.fitted_agents[ii].env,
                                       horizon=10)
            assert not compare_trajectories(traj1, traj2)
            assert not compare_trajectories(traj1, traj3)
            assert not compare_trajectories(traj2, traj3)

예제 #3

파일 보기

    n_trials=10,
    timeout=None,
    n_sim=5,
    n_fit=2,
    n_jobs=2,
    sampler_method='optuna_default')

initial_n_trials = len(ppo_stats.study.trials)

# save
ppo_stats.save('ppo_stats_backup')
del ppo_stats

# load
ppo_stats = AgentStats.load('ppo_stats_backup')

# continue previous optimization, now with 5s of timeout
best_trial, data = ppo_stats.optimize_hyperparams(n_trials=10,
                                                  timeout=5,
                                                  n_sim=5,
                                                  n_fit=2,
                                                  n_jobs=2,
                                                  continue_previous=True)

print("number of initial trials = ", initial_n_trials)
print("number of trials after continuing= ", len(ppo_stats.study.trials))

print("----")
print("fitting agents after choosing hyperparams...")
ppo_stats.fit()  # fit the 4 agents