#define parameters which nedded to be passed to DuelingDQNClass or control the all learning process N_A = env.action_space.n N_S = env.observation_space.shape[0] MEMORY_CAPACITY = 50000 TARGET_REP_ITER = 2000 MAX_EPISONDES = 900 E_GREEDY = 0.95 E_INCREMENT = 0.00001 GAMMA = 0.99 LR = 0.0001 BATCH_SIZE = 32 HIDDEN = [400, 400] RENDER = True RL = DuelingDQNPrioritizedReplay( n_actions=N_A, n_features=N_S, learning_rate=LR, e_greedy=E_GREEDY, reward_decay=GAMMA, memory_size=MEMORY_CAPACITY, e_greedy_increment=E_INCREMENT) total_steps = 0 for i in range(MAX_EPISONDES): s = env.reset() while True: if total_steps >= MEMORY_CAPACITY : env.render() a = RL.choose_action(s) s_, r, done,_ = env.step(a) if done: break s = s_
TARGET_REP_ITER = 2000 MAX_EPISODES = 900 E_GREEDY = 0.95 E_INCREMENT = 0.00001 GAMMA = 0.99 LR = 0.0001 BATCH_SIZE = 32 HIDDEN = [400, 400] RENDER = True RL = DuelingDQNPrioritizedReplay( n_actions=N_A, n_features=N_S, learning_rate=LR, e_greedy=E_GREEDY, reward_decay=GAMMA, hidden=HIDDEN, batch_size=BATCH_SIZE, replace_target_iter=TARGET_REP_ITER, memory_size=MEMORY_CAPACITY, e_greedy_increment=E_INCREMENT, ) total_steps = 0 running_r = 0 r_scale = 100 for i_episode in range(MAX_EPISODES): state = env.reset( ) # (coord_x, coord_y, vel_x, vel_y, angle, angular_vel, l_leg_on_ground, r_leg_on_ground) ep_r = 0 while True: if total_steps > MEMORY_CAPACITY: pass #env.render()
N_A = env.action_space.n N_S = env.observation_space.shape[0] MEMORY_CAPACITY = 50000 TARGET_REP_ITER = 2000 MAX_EPISODES = 900 E_GREEDY = 0.95 E_INCREMENT = 0.00001 GAMMA = 0.99 LR = 0.0001 BATCH_SIZE = 32 HIDDEN = [400, 400] RENDER = True RL = DuelingDQNPrioritizedReplay( n_actions=N_A, n_features=N_S, learning_rate=LR, e_greedy=E_GREEDY, reward_decay=GAMMA, hidden=HIDDEN, batch_size=BATCH_SIZE, replace_target_iter=TARGET_REP_ITER, memory_size=MEMORY_CAPACITY, e_greedy_increment=E_INCREMENT,) total_steps = 0 running_r = 0 r_scale = 100 for i_episode in range(MAX_EPISODES): s = env.reset() # (coord_x, coord_y, vel_x, vel_y, angle, angular_vel, l_leg_on_ground, r_leg_on_ground) ep_r = 0 while True: if total_steps > MEMORY_CAPACITY: env.render() a = RL.choose_action(s) s_, r, done, _ = env.step(a) if r == -100: r = -30 r /= r_scale