コード例 #1
0
def see_path():
    '''
    show the path from q_table
    :return:
    '''
    q_table = read_table()
    env = Maze()
    agent = DQN_Agent(actions=list(range(env.n_actions)))
    agent.load_q_table(q_table)
    s = env.reset()
    while True:
        env.render()
        a = agent.get_path(s)
        # print(a)
        time.sleep(0.2)
        s_, r, done = env.step(a)
        s = s_
        if done:
            env.render()
            break
コード例 #2
0
def common_check(episodes=400):
    '''
    an ordinary learning process, and store q_table
    :return:
    '''
    env = Maze()
    agent = DQN_Agent(actions=list(range(env.n_actions)))
    for episode in range(episodes):
        s = env.reset()
        episode_reward = 0
        while True:
            #env.render()                 # You can comment all render() to turn off the graphical interface in training process to accelerate your code.
            a = agent.choose_action(s)
            s_, r, done = env.step(a)
            q_table = agent.update_q(s, s_, a, r)
            episode_reward += r
            s = s_
            if done:
                #env.render()
                break
        print('episode:', episode, 'episode_reward:', episode_reward)
    store_table(q_table)
コード例 #3
0
def check_converge_time():
    '''
    to show how many episodes needed to find the optimal path
    for the first time
    :return:
    '''
    env = Maze()
    cvg_time = 0
    for i in range(100):
        print(i)
        agent = DQN_Agent(actions=list(range(env.n_actions)))
        flag = 0
        for episode in range(300):
            if flag:
                break
            s = env.reset()
            episode_reward = 0
            while True:
                #env.render()                 # You can comment all render() to turn off the graphical interface in training process to accelerate your code.
                a = agent.choose_action(s)
                s_, r, done = env.step(a)
                q_table = agent.update_q(s, s_, a, r)
                episode_reward += r
                if episode_reward == 4:
                    cvg_time += episode
                    flag = 1
                s = s_
                #print(s)
                if done:
                    #env.render()
                    #time.sleep(0.5)
                    break
            # print('episode:', episode, 'episode_reward:', episode_reward)
        if flag == 0:
            cvg_time += 300
    print(cvg_time / 100)
コード例 #4
0
ファイル: main.py プロジェクト: simonho0406/Dyna-Q-Learning
    episodes = 100
    model_based_episodes = 5
    env = Maze()
    model = Model(actions=list(range(env.n_actions)))
    agent = Agent(actions=list(range(
        env.n_actions)))  # 从range(4),也就是0,1,2,3(上下右左)四个行为中选择
    for episode in range(episodes):  # 对于每一段,从开始到结束
        s = env.reset()
        episode_reward = 0
        while True:
            #env.render()                 # You can comment all render() to turn off the graphical interface in training process to accelerate your code.

            # move one step
            a = agent.choose_action(str(s))

            s_, r, done = env.step(a)

            # update Q model-free
            agent.learn(str(s), a, r, str(s_), done)

            model.store_transition(str(s), a, r, s_)

            # update Q model-based
            for n in range(model_based_episodes):
                ss, sa = model.sample_s_a()
                sr, ss_ = model.get_r_s_(ss, sa)
                agent.learn(ss, sa, sr, str(ss_), done)

            episode_reward += r
            s = s_
コード例 #5
0
if __name__ == "__main__":
    ### START CODE HERE ###
    # This is an agent with random policy. You can learn how to interact with the environment through the code below.
    # Then you can delete it and write your own code.

    env = Maze()
    training_epoch = 100 if maze == '1' else 1000
    agent = D_Q_Agent(training_epoch)

    for episode in range(training_epoch):
        agent.if_rewarded = False
        s = env.reset()
        while True:
            # env.render()                 # You can comment all render() to turn off the graphical interface in training process to accelerate your code.
            chosen_direction = agent.choose_action(s, episode)
            s_, r, done = env.step(chosen_direction)
            agent.update_Q_value(s, chosen_direction, r)
            if s_[-1]:
                agent.if_rewarded = True
                agent.if_rewarded_in_the_whole_training = True
            s = s_
            agent.simulative_training(100)

            if done:
                #env.render()
                time.sleep(0.5)
                break

        print('episode:', episode)
    print('Training Finished! Now Demonstrate the Optimal Policy:')
    while True: