Exemple #1
0
#define parameters which nedded to be passed to DuelingDQNClass or control the all learning process
N_A = env.action_space.n
N_S = env.observation_space.shape[0]
MEMORY_CAPACITY = 50000
TARGET_REP_ITER = 2000
MAX_EPISONDES = 900
E_GREEDY = 0.95
E_INCREMENT = 0.00001
GAMMA = 0.99
LR = 0.0001
BATCH_SIZE = 32
HIDDEN = [400, 400]
RENDER = True

RL = DuelingDQNPrioritizedReplay(
    n_actions=N_A, n_features=N_S, learning_rate=LR, e_greedy=E_GREEDY, reward_decay=GAMMA,
    memory_size=MEMORY_CAPACITY, e_greedy_increment=E_INCREMENT)

total_steps = 0

for i in range(MAX_EPISONDES):
    s = env.reset()
    while True:
        if total_steps >= MEMORY_CAPACITY : env.render()
        a = RL.choose_action(s)
        s_, r, done,_ = env.step(a)

        if done:
            break

        s = s_
TARGET_REP_ITER = 2000
MAX_EPISODES = 900
E_GREEDY = 0.95
E_INCREMENT = 0.00001
GAMMA = 0.99
LR = 0.0001
BATCH_SIZE = 32
HIDDEN = [400, 400]
RENDER = True

RL = DuelingDQNPrioritizedReplay(
    n_actions=N_A,
    n_features=N_S,
    learning_rate=LR,
    e_greedy=E_GREEDY,
    reward_decay=GAMMA,
    hidden=HIDDEN,
    batch_size=BATCH_SIZE,
    replace_target_iter=TARGET_REP_ITER,
    memory_size=MEMORY_CAPACITY,
    e_greedy_increment=E_INCREMENT,
)

total_steps = 0
running_r = 0
r_scale = 100
for i_episode in range(MAX_EPISODES):
    state = env.reset(
    )  # (coord_x, coord_y, vel_x, vel_y, angle, angular_vel, l_leg_on_ground, r_leg_on_ground)
    ep_r = 0
    while True:
        if total_steps > MEMORY_CAPACITY: pass  #env.render()
N_A = env.action_space.n
N_S = env.observation_space.shape[0]
MEMORY_CAPACITY = 50000
TARGET_REP_ITER = 2000
MAX_EPISODES = 900
E_GREEDY = 0.95
E_INCREMENT = 0.00001
GAMMA = 0.99
LR = 0.0001
BATCH_SIZE = 32
HIDDEN = [400, 400]
RENDER = True

RL = DuelingDQNPrioritizedReplay(
    n_actions=N_A, n_features=N_S, learning_rate=LR, e_greedy=E_GREEDY, reward_decay=GAMMA,
    hidden=HIDDEN, batch_size=BATCH_SIZE, replace_target_iter=TARGET_REP_ITER,
    memory_size=MEMORY_CAPACITY, e_greedy_increment=E_INCREMENT,)


total_steps = 0
running_r = 0
r_scale = 100
for i_episode in range(MAX_EPISODES):
    s = env.reset()  # (coord_x, coord_y, vel_x, vel_y, angle, angular_vel, l_leg_on_ground, r_leg_on_ground)
    ep_r = 0
    while True:
        if total_steps > MEMORY_CAPACITY: env.render()
        a = RL.choose_action(s)
        s_, r, done, _ = env.step(a)
        if r == -100: r = -30
        r /= r_scale