def main(): global env, RL env = Maze('./env/maps/map2.json', full_observation=True) RL = DeepQNetwork( n_actions=4, n_features=25, restore_path=None, # restore_path=base_path + 'model_dqn.ckpt', learning_rate=0.005, reward_decay=0.9, e_greedy=0.95, replace_target_iter=800, batch_size=64, # e_greedy_increment=None, e_greedy_increment=1e-3, output_graph=False, ) env.after(100, run_maze) env.mainloop()
stats = plotting.EpisodeStats(episode_lengths=np.zeros(no_episodes), episode_rewards=np.zeros(no_episodes)) T = 2000 number_of_contents = 10 myenv = MyEnv(density=density, T=T, number_of_contents=number_of_contents) if (RL == False): RL = DeepQNetwork(myenv.no_actions, myenv.observation_length, learning_rate=0.001, reward_decay=0.9, e_greedy=0.9, replace_target_iter=5000, memory_size=2000, batch_size=220 # output_graph=True ) print("No. vehicles:" + str(myenv.number_of_vehicles)) for e in range(no_episodes): myenv = MyEnv(density=density, T=T, number_of_contents=number_of_contents) myenv.episode = e myenv.no_episodes = no_episodes # Reset the envirounment
action = DQN.choose_action(observation) observation_, reward, done = env.step(action) DQN.store_transition(observation, action, reward, observation_) # store memory if (step > 200) and (step % 5 == 0): DQN.learn() if done: break step += 1 print('game over') env.destroy() if __name__ == "__main__": env = Maze() DQN = DeepQNetwork(env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, output_graph=False) env.after(100, run_maze) env.mainloop() DQN.plot_cost() # print(DQN.n_features)
import gym from DQN_brain import DeepQNetwork env = gym.make('CartPole-v0') env = env.unwrapped print(env.action_space) print(env.observation_space) print(env.observation_space.high) print(env.observation_space.low) RL = DeepQNetwork(n_actions=env.action_space.n, n_features=env.observation_space.shape[0], learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008) total_steps = 0 for i_episode in range(100): observation = env.reset() ep_r = 0 while True: env.render() action = RL.choose_action(observation) observation_, reward, done, info = env.step(action)
elif Inverted_Pendulum: q_init=0 env = RL_Pendulum(q_init=q_init, dq_init=0) # Set up Deep Q-network if Training_Mode: e_greedy=0.95 else: # testing mode e_greedy=1.0 RL = DeepQNetwork(env.n_actions, env.n_states, learning_rate=0.0005, reward_decay=0.995, e_greedy=e_greedy, replace_target_iter=400, batch_size=128, memory_size=4000, e_greedy_increment=None, record_history=True, # output_graph=True, observation_interval=observation_interval, ) # Run simulation and training time_start=time.time() env.after(100, run_pendulum) env.mainloop() # Print total simulation and real world time time_end=time.time()-time_start print("\n------------------------------------\n")
end = time.time() print("game over!") print('运行时间:', end - start) engine = pyttsx3.init() engine.say('程序运行完成') engine.runAndWait() env.destory() if __name__ == "__main__": env = Maze() RL = DeepQNetwork( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, # 尝试减少替换次数 memory_size=2000, # 尝试扩大记忆库 output_graph=False) RL_ = DeepQNetwork2( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, # 尝试减少替换次数 memory_size=2000, # 尝试扩大记忆库 ) RL__ = DeepQNetwork3( env.n_actions,
import tensorflow as tf from maze_env2 import Maze from DQN_brain import DeepQNetwork import time if __name__ == '__main__': env = Maze() RL = DeepQNetwork( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) RL.load_model() RL.epsilon = 1 observation = env.reset() while True: # fresh env env.render() time.sleep(1) # RL choose action based on observation action = RL.choose_action(observation) print(observation * 4) # RL take action and get next observation and reward observation_, reward, done = env.step(action) observation = observation_ # break while loop when end of this episode
# break while loop when end of this episode step += 1 day += 1 if __name__ == "__main__": learning_rate = sys.argv[1] reward_decay = sys.argv[2] INTERVAL = sys.argv[3] RL = DeepQNetwork(n_actions=3, n_features=6, learning_rate=float(learning_rate), reward_decay=float(reward_decay), e_greedy=0.9, replace_target_iter=500, memory_size=20000, output_graph=False ) run_learning(RL, int(INTERVAL)) value = run_testing(RL, int(INTERVAL)) cost = RL.get_last_cost() file = open("./result/" + learning_rate+"_"+reward_decay+"_"+INTERVAL + "_good", "a") file.write("value: %f , cost: %f \n" % (value,cost)) file.close()
# break while loop when end of this episode if done == 'treasure' or done == 'trap': break total_step += 1 local_step += 1 # print('==================================================================') print('Game', game + 1, '.', local_step, 'steps used to', done, '.', 'Global step =', total_step, '.') # end of game print('Game Completed.') env.destroy() if __name__ == "__main__": # maze game env = Maze() RL = DeepQNetwork( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_net=200, memory_size=2000, ) env.after(100, run_maze) env.mainloop() RL.plot_cost()
rewards = [] # Number of trials (episodes) no_episodes = 4000; stats = plotting.EpisodeStats( episode_lengths=np.zeros(no_episodes), episode_rewards=np.zeros(no_episodes)) myenv = MyEnv(density=density) print(myenv.number_of_vehicles) RL = DeepQNetwork(myenv.no_actions, myenv.number_of_contents+2, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=100, # output_graph=True ) for e in range(no_episodes): # Reset the envirounment observation = myenv.reset(); for i in range(myenv.number_of_vehicles): # take action # RL choose action based on observation action = RL.choose_action(np.array(observation))
def updatePlotData(self, x, y): self.xs.append(x) self.ys.append(y) if len(self.xs) > 1: self.updatePlotSignal.emit() return # 初始化游戏 game_env = game() # 初始化DQN网络 DQN = DeepQNetwork(game_env.n_actions, game_env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.8, replace_target_iter=1000, memory_size=1500, output_graph=True) save_checkpoint = False # 设定是否保存记录 def run_DQN(): # 读取checkpoint # DQN.load_model('./saved_models/model-54000pts-2020-06-11-15-10-16.ckpt') fig_x = [] fig_y = []
import gym from DQN_brain import DeepQNetwork env = gym.make('MountainCar-v0') env = env.unwrapped print(env.action_space) print(env.observation_space) print(env.observation_space.high) print(env.observation_space.low) RL = DeepQNetwork( n_actions=3, n_features=2, learning_rate=0.001, e_greedy=0.9, replace_target_iter=300, memory_size=3000, e_greedy_increment=0.0001, ) total_steps = 0 for i_episode in range(10): observation = env.reset() ep_r = 0 while True: env.render() action = RL.choose_action(observation)
def run(episode, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=5000): print('------------------Environment------------------') print(' length_range:\t\t', params.length_range) print(' priority_range:\t', params.priority_range) print(' sensors_amount:\t', params.sensors_amount) print(' s:\t\t\t', params.s) print(' v:\t\t\t', params.v) print(' period:\t\t', params.period) print(' t_limit:\t\t', params.t_limit) print(' max_time:\t\t', params.max_time) print(' Random seed:\t\t', params.seed) print('--------------------Method---------------------') print(' algorithm:\t\tDQN') print(' episode:\t\t', episode) print(' learning_rate:\t', learning_rate) print(' reward_decay:\t\t', reward_decay) print(' e_greedy:\t\t', e_greedy) print(' replace_target_iter:\t', replace_target_iter) print(' memory_size:\t\t', memory_size) print('-----------------------------------------------') RL = DeepQNetwork( params.sensors_amount, params.sensors_amount, learning_rate=learning_rate, reward_decay=reward_decay, e_greedy=e_greedy, replace_target_iter=replace_target_iter, memory_size=memory_size, # output_graph=True ) costs = [] best_uav, best_result, best_cost = None, None, float('inf') step = 0 for _ in tqdm(range(episode)): # initial observation sensors, uav = generateMap() observation = observe(uav, sensors) np.random.seed() previous_cost = cost(uav, sensors) while True: # RL choose action based on observation action = RL.choose_action(observation) # RL take action and get next observation and reward done = uav.fly_to(sensors[action]) is False _cost = cost(uav, sensors) _observation = observe(uav, sensors) reward = (previous_cost - _cost) * 100 previous_cost = _cost # RL learn from this transition RL.store_transition(observation, action, reward, _observation) if (step > episode / 5) and (step % 5 == 0): RL.learn() # swap observation observation = _observation # break while loop when end of this episode if done: costs.append(_cost) if _cost <= best_cost: best_result = cost(uav, sensors, details=True) best_cost = _cost best_uav = UAV(uav) break step += 1 # output results print('Max time', params.max_time, 'Final time:', best_uav.records[-1][0]) print('Best cost:', best_cost) # RL.plot_cost() # # show costs plot # x, y = list(range(episode)), costs # plt.plot(x, y, color='red') # plt.show() with open('./out/DQN_{:%m-%d-%H-%M-%S}.json'.format(params.time), "w+") as f: f.write(json.dumps(best_result)) f.close() # draw(best_uav, sensors, details=True) draw(best_uav, sensors, details=False) return best_result