) args = parser.parse_args() samples = { "state": [], "state_img": [], "next_state": [], "next_state_img": [], "reward": [], "action": [], "terminal": [], } env = LunarLander() env.render() env.viewer.window.on_key_press = key_press env.viewer.window.on_key_release = key_release a = np.array([0]) episode_rewards = [] steps = 0 while True: episode_reward = 0 state = env.reset() state_img = env.render( mode="rgb_array")[::4, ::4, :] # downsampling (every 4th pixel). while True:
if key == KEY.ENTER: _human_saliency = not _human_saliency if key == KEY.F: _human_fast_forward = not _human_fast_forward if key == KEY.A: _human_agent_action = ACTIONS.index('LEFT') if key == KEY.D: _human_agent_action = ACTIONS.index('RIGHT') if key == KEY.W: _human_agent_action = ACTIONS.index('FIRE') if key == KEY.Q: _human_agent_action = ACTIONS.index('RIGHTFIRE') if key == KEY.E: _human_agent_action = ACTIONS.index('LEFTFIRE') def key_release(key, mod): global _human_agent_action _human_agent_action = ACTIONS.index('NOOP') env.render() env.unwrapped.viewer.window.on_key_press = key_press env.unwrapped.viewer.window.on_key_release = key_release # create models if RAM: a = random.randrange(env.action_space.n) s, r, done, info = env.step(a) N_STATE = len(s) MODEL = LanderDQN if 'lunar' in opt.env else RamDQN policy_net = MODEL(N_STATE, N_ACTIONS).to(device) target_net = MODEL(N_STATE, N_ACTIONS).to(device) else: MODEL = DDQN if opt.dueling else DQN policy_net = MODEL(n_actions=N_ACTIONS).to(device) target_net = MODEL(n_actions=N_ACTIONS).to(device)