Python QTable 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: learning_test_utilities

메소드/함수: QTable

hotexamples.com에서의 예제들: 2

Python QTable - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 learning_test_utilities.QTable에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

@author: momos_000
"""
# using tabular Q-Learning to learn a policy over options in RoomWorld
# referring to U.C. Berkeley DeepRL Bootcamp materials
# Nov 15: HRL without planning, with interruption

import time
import numpy as np
from room_world import RoomWorld, SmdpAgent_Q
import learning_test_utilities as util

#setup
env = RoomWorld()
state_space = env.state_space
num_actions = env.action_space.size
q_func = util.QTable(state_space, num_actions)  # as "goto hallway" options
options = util.create_hallway_options(env)
agent_smdp = SmdpAgent_Q(env, q_func, options)

#training
max_options = 200
iterations, epsilon, gamma, alpha = util.learning_parameters()
iterations = 100
#alpha       = 1./16. # overwrite to match Sutton
report_freq = iterations / 100
hist = np.zeros(
    (iterations, 7)
)  #training step, avg_td, avg_ret, avg_greedy_ret, avg_greedy_successrate, avg_greedy_steps, avg_greedy_choices
start_time = time.time()

for itr in range(iterations):

예제 #2

파일 보기

# November 2, 2017. Modified to replan if the goal is not reached
# using tabular Q-Learning to learn a flat policy in RoomWorld
# referring to U.C. Berkeley DeepRL Bootcamp materials

import time
import numpy as np
from room_world import RoomWorld, SmdpPlanningAgent_Q
import learning_test_utilities as util

#setup
env = RoomWorld()
state_space = env.state_space
num_actions = env.action_space.size
plan_length = 2
q_func = util.QTable(state_space,
                     num_actions**plan_length)  # as "goto hallway" options
options = util.create_hallway_options(env)
agent_plan = SmdpPlanningAgent_Q(env, q_func, options, plan_length=plan_length)
#training
iterations, epsilon, gamma, alpha = util.learning_parameters()
max_plans = 100
#alpha       = 1./16. # overwrite to match Sutton
report_freq = iterations / 50
hist = np.zeros(
    (iterations, 7)
)  #training step, avg_td, avg_ret, avg_greedy_ret, avg_greedy_successrate, avg_greedy_steps, avg_greedy_choices
start_time = time.time()

for itr in range(iterations):
    cur_state = env.reset(random_placement=True)
    done = [False]