Esempio n. 1
0
def update():
    for episode in range(Max_Episodes):
        # Initial oberservation
        observation = env.reset()
        
        while True:
            # Fresh env
            env.render()
            # RL choose actions
            action = RL.choose_action(str(observation))
            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)
            # Save the transition from S to S'
            RL.learn(str(observation), action, reward, str(observation_))
            # Swap observation
            observation = observation_

            if done:
                break
        
    print('game over')
    env.destroy()

if __name__ == "__main__":
    env = Maze()
    RL = QLearningTable(actions = list(range(env.n_actions)))
    
    env.after(100, update)
    env.mainloop()
Esempio n. 2
0
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
Esempio n. 3
0
File: main.py Progetto: msdnqqy/RL
            action = rl.choose_action(state_str, avaliable_actions)  #选择动作
            reward, state_next, is_success = maze.step(action)  #获取环境奖励

            state_next_str_arr = np.array(state_next) / maze.unit

            state_next_str = str(state_next_str_arr.tolist())

            rl.update(state_str, action, state_next_str, reward, is_success,
                      steps)  #更新状态
            steps.append([state_str, action])  #存储经历,后续学习
            maze.render()
            maze.write_weight(rl.state_table)

        print('run end {0}'.format(j))
        print(rl.state_table.round(2))
        time.sleep(0.5)

        # maze.write_weight(rl.state_table)
        #进行强制更新,更新全部路径的权重多次训练之后就能够得到优秀权重
        # rl.forceUpdate(steps,reward/10)
        print("forceUpdate")
        # print(rl.state_table.round(2))


if __name__ == '__main__':
    maze = Maze(8, 6, chif=15)
    rl = RLbrain(maze.get_all_action())  #获取所有动作
    maze.after(100, render)
    maze.mainloop()
Esempio n. 4
0
    for episode in range(100):
        # initial observation
        s_curr = maze._reset_maze()

        while True:
            maze._render()

            # Get next action from the Q-table
            action = rl.select_next_action(str(s_curr))

            # take the action and observe the next state and reward
            s_next, reward, isDone = maze._update_maze(action)

            # learn from the feedback
            rl.learn(str(s_curr), action, reward, str(s_next))

            s_curr = s_next
            if isDone:
                break

    print("Game over")
    maze.destroy()


if __name__ == "__main__":
    maze = Maze()
    rl = QLearningTable(actions=list(range(len(maze.action_space))))

    maze.after(100, update)
    maze.mainloop()
Esempio n. 5
0
        s = env.reset()
        while True:
            env.render()
            a = RL.choose_action(str(s))
            s_, r, done = env.step(a)
            RL.learn(str(s), a, r, str(s_))

            # use a model to output (r, s_) by inputting (s, a)
            # the model in dyna Q version is just like a memory replay buffer
            env_model.store_transition(str(s), a, r, s_)
            for n in range(10):  # learn 10 more times using the env_model
                ms, ma = env_model.sample_s_a()  # ms in here is a str
                mr, ms_ = env_model.get_r_s_(ms, ma)
                RL.learn(ms, ma, mr, str(ms_))

            s = s_
            if done:
                break

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = QLearningTable(actions=list(range(env.n_actions)))
    env_model = EnvModel(actions=list(range(env.n_actions)))

    env.after(0, update)
    env.mainloop()
Esempio n. 6
0
        observation = env.reset()
        while True:
            # Fresh env
            env.render()
            # RL choose actions
            action = RL.choose_action(str(observation))
            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)
            # Save the transition from S to S'
            RL.learn(str(observation), action, reward, str(observation_))
            # Swap observation
            observation = observation_
            if done:
                break
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    if Algorithm == 'SarsaLambda':
        RL = SarsaLambda(action_space=list(range(env.n_actions)))
        env.after(100, update_Sarsa)
    elif Algorithm == 'Q_learning':
        RL = Q_learning(action_space=list(range(env.n_actions)))
        env.after(100, update)
    elif Algorithm == 'Sarsa':
        RL = Sarsa(action_space=list(range(env.n_actions)))
        env.after(100, update_Sarsa)
    env.mainloop()