Exemple #1
0
    )

    args = parser.parse_args()

    samples = {
        "state": [],
        "state_img": [],
        "next_state": [],
        "next_state_img": [],
        "reward": [],
        "action": [],
        "terminal": [],
    }

    env = LunarLander()
    env.render()
    env.viewer.window.on_key_press = key_press
    env.viewer.window.on_key_release = key_release

    a = np.array([0])

    episode_rewards = []
    steps = 0
    while True:
        episode_reward = 0
        state = env.reset()
        state_img = env.render(
            mode="rgb_array")[::4, ::4, :]  # downsampling (every 4th pixel).

        while True:
            if key == KEY.ENTER: _human_saliency = not _human_saliency
            if key == KEY.F: _human_fast_forward = not _human_fast_forward

            if key == KEY.A:  _human_agent_action = ACTIONS.index('LEFT')
            if key == KEY.D:  _human_agent_action = ACTIONS.index('RIGHT')
            if key == KEY.W:  _human_agent_action = ACTIONS.index('FIRE')
            if key == KEY.Q:  _human_agent_action = ACTIONS.index('RIGHTFIRE')
            if key == KEY.E:  _human_agent_action = ACTIONS.index('LEFTFIRE')


        def key_release(key, mod):
            global _human_agent_action
            _human_agent_action = ACTIONS.index('NOOP')


        env.render()
        env.unwrapped.viewer.window.on_key_press = key_press
        env.unwrapped.viewer.window.on_key_release = key_release

    # create models
    if RAM:
        a = random.randrange(env.action_space.n)
        s, r, done, info = env.step(a)
        N_STATE = len(s)
        MODEL = LanderDQN if 'lunar' in opt.env else RamDQN
        policy_net = MODEL(N_STATE, N_ACTIONS).to(device)
        target_net = MODEL(N_STATE, N_ACTIONS).to(device)
    else:
        MODEL = DDQN if opt.dueling else DQN
        policy_net = MODEL(n_actions=N_ACTIONS).to(device)
        target_net = MODEL(n_actions=N_ACTIONS).to(device)