コード例 #1
0
ファイル: run_dqn.py プロジェクト: ZhangRui111/Homework
def main():
    global env, RL
    env = Maze('./env/maps/map2.json', full_observation=True)
    RL = DeepQNetwork(
        n_actions=4,
        n_features=25,
        restore_path=None,
        # restore_path=base_path + 'model_dqn.ckpt',
        learning_rate=0.005,
        reward_decay=0.9,
        e_greedy=0.95,
        replace_target_iter=800,
        batch_size=64,
        # e_greedy_increment=None,
        e_greedy_increment=1e-3,
        output_graph=False,
    )
    env.after(100, run_maze)
    env.mainloop()
コード例 #2
0
        stats = plotting.EpisodeStats(episode_lengths=np.zeros(no_episodes),
                                      episode_rewards=np.zeros(no_episodes))

        T = 2000
        number_of_contents = 10
        myenv = MyEnv(density=density,
                      T=T,
                      number_of_contents=number_of_contents)

        if (RL == False):
            RL = DeepQNetwork(myenv.no_actions,
                              myenv.observation_length,
                              learning_rate=0.001,
                              reward_decay=0.9,
                              e_greedy=0.9,
                              replace_target_iter=5000,
                              memory_size=2000,
                              batch_size=220
                              # output_graph=True
                              )

        print("No. vehicles:" + str(myenv.number_of_vehicles))

        for e in range(no_episodes):

            myenv = MyEnv(density=density,
                          T=T,
                          number_of_contents=number_of_contents)
            myenv.episode = e
            myenv.no_episodes = no_episodes
            # Reset the envirounment
コード例 #3
0
            action = DQN.choose_action(observation)
            observation_, reward, done = env.step(action)
            DQN.store_transition(observation, action, reward, observation_)

            # store memory
            if (step > 200) and (step % 5 == 0):
                DQN.learn()

            if done:
                break
            step += 1

    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    DQN = DeepQNetwork(env.n_actions,
                       env.n_features,
                       learning_rate=0.01,
                       reward_decay=0.9,
                       e_greedy=0.9,
                       replace_target_iter=200,
                       memory_size=2000,
                       output_graph=False)
    env.after(100, run_maze)
    env.mainloop()
    DQN.plot_cost()
    # print(DQN.n_features)
コード例 #4
0
ファイル: run_cartPole.py プロジェクト: toyijiu/my_code
import gym
from DQN_brain import DeepQNetwork

env = gym.make('CartPole-v0')

env = env.unwrapped

print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DeepQNetwork(n_actions=env.action_space.n,
                  n_features=env.observation_space.shape[0],
                  learning_rate=0.01,
                  e_greedy=0.9,
                  replace_target_iter=100,
                  memory_size=2000,
                  e_greedy_increment=0.0008)

total_steps = 0

for i_episode in range(100):
    observation = env.reset()
    ep_r = 0

    while True:
        env.render()
        action = RL.choose_action(observation)
        observation_, reward, done, info = env.step(action)
コード例 #5
0
    elif Inverted_Pendulum:
        q_init=0
    env = RL_Pendulum(q_init=q_init, dq_init=0)

    # Set up Deep Q-network
    if Training_Mode:
        e_greedy=0.95
    else: # testing mode
        e_greedy=1.0

    RL = DeepQNetwork(env.n_actions, env.n_states,
                        learning_rate=0.0005,
                        reward_decay=0.995,
                        e_greedy=e_greedy,
                        replace_target_iter=400,
                        batch_size=128,
                        memory_size=4000,
                        e_greedy_increment=None,
                        record_history=True,
                        # output_graph=True,
                        observation_interval=observation_interval,
                    )
  

    # Run simulation and training
    time_start=time.time()
    env.after(100, run_pendulum)
    env.mainloop()
    
    # Print total simulation and real world time
    time_end=time.time()-time_start
    print("\n------------------------------------\n")
コード例 #6
0
    end = time.time()
    print("game over!")
    print('运行时间:', end - start)
    engine = pyttsx3.init()
    engine.say('程序运行完成')
    engine.runAndWait()
    env.destory()


if __name__ == "__main__":
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,  # 尝试减少替换次数
        memory_size=2000,  # 尝试扩大记忆库
        output_graph=False)
    RL_ = DeepQNetwork2(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,  # 尝试减少替换次数
        memory_size=2000,  # 尝试扩大记忆库
    )
    RL__ = DeepQNetwork3(
        env.n_actions,
コード例 #7
0
import tensorflow as tf
from maze_env2 import Maze
from DQN_brain import DeepQNetwork
import time

if __name__ == '__main__':
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )
    RL.load_model()
    RL.epsilon = 1
    observation = env.reset()
    while True:
        # fresh env
        env.render()
        time.sleep(1)
        # RL choose action based on observation
        action = RL.choose_action(observation)
        print(observation * 4)
        # RL take action and get next observation and reward
        observation_, reward, done = env.step(action)
        observation = observation_
        # break while loop when end of this episode
コード例 #8
0
            # break while loop when end of this episode

            step += 1
            day += 1


if __name__ == "__main__":

    learning_rate = sys.argv[1]
    reward_decay = sys.argv[2]
    INTERVAL = sys.argv[3]


    RL = DeepQNetwork(n_actions=3,
                        n_features=6,
                        learning_rate=float(learning_rate),
                        reward_decay=float(reward_decay),
                        e_greedy=0.9,
                        replace_target_iter=500,
                        memory_size=20000,
                        output_graph=False
                        )
    run_learning(RL, int(INTERVAL))
    value = run_testing(RL, int(INTERVAL))
    cost = RL.get_last_cost()
    file = open("./result/" + learning_rate+"_"+reward_decay+"_"+INTERVAL + "_good", "a")
    file.write("value: %f ,  cost: %f \n" % (value,cost))
    file.close()

コード例 #9
0
            # break while loop when end of this episode
            if done == 'treasure' or done == 'trap':
                break
            total_step += 1
            local_step += 1


#        print('==================================================================')
        print('Game', game + 1, '.', local_step, 'steps used to', done, '.',
              'Global step =', total_step, '.')

    # end of game
    print('Game Completed.')
    env.destroy()

if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_net=200,
        memory_size=2000,
    )
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
コード例 #10
0
        rewards = []

        # Number of trials (episodes)
        no_episodes = 4000;

        stats = plotting.EpisodeStats(
            episode_lengths=np.zeros(no_episodes),
            episode_rewards=np.zeros(no_episodes))


        myenv = MyEnv(density=density)
        print(myenv.number_of_vehicles)
        RL = DeepQNetwork(myenv.no_actions, myenv.number_of_contents+2,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=100,
                      # output_graph=True
                      )

        for e in range(no_episodes):

            # Reset the envirounment
            observation = myenv.reset();

            for i in range(myenv.number_of_vehicles):

                # take action
                # RL choose action based on observation
                action = RL.choose_action(np.array(observation))
コード例 #11
0
    def updatePlotData(self, x, y):
        self.xs.append(x)
        self.ys.append(y)
        if len(self.xs) > 1:
            self.updatePlotSignal.emit()
        return


# 初始化游戏
game_env = game()
# 初始化DQN网络
DQN = DeepQNetwork(game_env.n_actions,
                   game_env.n_features,
                   learning_rate=0.01,
                   reward_decay=0.9,
                   e_greedy=0.8,
                   replace_target_iter=1000,
                   memory_size=1500,
                   output_graph=True)

save_checkpoint = False  # 设定是否保存记录


def run_DQN():

    # 读取checkpoint
    # DQN.load_model('./saved_models/model-54000pts-2020-06-11-15-10-16.ckpt')

    fig_x = []
    fig_y = []
コード例 #12
0
ファイル: run_mountainCar.py プロジェクト: toyijiu/my_code
import gym
from DQN_brain import DeepQNetwork

env = gym.make('MountainCar-v0')
env = env.unwrapped

print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DeepQNetwork(
    n_actions=3,
    n_features=2,
    learning_rate=0.001,
    e_greedy=0.9,
    replace_target_iter=300,
    memory_size=3000,
    e_greedy_increment=0.0001,
)

total_steps = 0

for i_episode in range(10):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)
コード例 #13
0
def run(episode,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=5000):

    print('------------------Environment------------------')
    print('  length_range:\t\t', params.length_range)
    print('  priority_range:\t', params.priority_range)
    print('  sensors_amount:\t', params.sensors_amount)
    print('  s:\t\t\t', params.s)
    print('  v:\t\t\t', params.v)
    print('  period:\t\t', params.period)
    print('  t_limit:\t\t', params.t_limit)
    print('  max_time:\t\t', params.max_time)
    print('  Random seed:\t\t', params.seed)
    print('--------------------Method---------------------')
    print('  algorithm:\t\tDQN')
    print('  episode:\t\t', episode)
    print('  learning_rate:\t', learning_rate)
    print('  reward_decay:\t\t', reward_decay)
    print('  e_greedy:\t\t', e_greedy)
    print('  replace_target_iter:\t', replace_target_iter)
    print('  memory_size:\t\t', memory_size)
    print('-----------------------------------------------')

    RL = DeepQNetwork(
        params.sensors_amount,
        params.sensors_amount,
        learning_rate=learning_rate,
        reward_decay=reward_decay,
        e_greedy=e_greedy,
        replace_target_iter=replace_target_iter,
        memory_size=memory_size,
        # output_graph=True
    )

    costs = []
    best_uav, best_result, best_cost = None, None, float('inf')
    step = 0
    for _ in tqdm(range(episode)):
        # initial observation
        sensors, uav = generateMap()
        observation = observe(uav, sensors)
        np.random.seed()

        previous_cost = cost(uav, sensors)

        while True:
            # RL choose action based on observation
            action = RL.choose_action(observation)

            # RL take action and get next observation and reward
            done = uav.fly_to(sensors[action]) is False
            _cost = cost(uav, sensors)
            _observation = observe(uav, sensors)
            reward = (previous_cost - _cost) * 100
            previous_cost = _cost

            # RL learn from this transition
            RL.store_transition(observation, action, reward, _observation)

            if (step > episode / 5) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = _observation

            # break while loop when end of this episode
            if done:
                costs.append(_cost)
                if _cost <= best_cost:
                    best_result = cost(uav, sensors, details=True)
                    best_cost = _cost
                    best_uav = UAV(uav)
                break
            step += 1

    # output results
    print('Max time', params.max_time, 'Final time:', best_uav.records[-1][0])
    print('Best cost:', best_cost)

    # RL.plot_cost()

    # # show costs plot
    # x, y = list(range(episode)), costs
    # plt.plot(x, y, color='red')
    # plt.show()

    with open('./out/DQN_{:%m-%d-%H-%M-%S}.json'.format(params.time),
              "w+") as f:
        f.write(json.dumps(best_result))
        f.close()

    # draw(best_uav, sensors, details=True)
    draw(best_uav, sensors, details=False)

    return best_result