from RL_brain import DoubleDQN import numpy as np import matplotlib.pyplot as plt import tensorflow as tf import matplotlib from Res_plot import Res_plot env = RIS_UAV() res = Res_plot(env) MEMORY_SIZE = 3200 Episodes = env.eps sess = tf.Session() with tf.variable_scope('Double_DQN_UAV'): double_DQN_UAV = DoubleDQN( n_actions=env.n_actions, n_features=env.n_features, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=True, ris=False, passive_shift=False, sess=sess, output_graph=True) with tf.variable_scope('Double_DQN_FIX_RIS'): double_DQN_RIS_Fix = DoubleDQN( n_actions=env.n_actions, n_features=env.n_features, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=True, ris=True, passive_shift=False, sess=sess, output_graph=True) with tf.variable_scope('Double_DQN_RIS'): double_DQN_RIS = DoubleDQN( n_actions=env.n_actions, n_features=env.n_features, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=True, ris=True, passive_shift=True, sess=sess, output_graph=True) sess.run(tf.global_variables_initializer()) # record the results
action = RL.choose_action(observation) observation_,reward,done = env.step(action) RL.store_transition(observation, action, reward, observation_) if (step > 200) and (step % 5 == 0): RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 print('game over') env.destroy() if __name__ == '__main__': env = Maze() RL = DoubleDQN(env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) env.after(100, run_maze) env.mainloop() RL.plot_cost()
file=xlwt.Workbook(encoding='utf-8',style_compression=0) sheet=file.add_sheet('aa') t0= time.clock() MyEngine = MyEngine() MyEngine.QD_start() MEMORY_SIZE = 500 #原值为3000 ACTION_SPACE = 3 sess = tf.Session() RL = DoubleDQN( n_actions=ACTION_SPACE, n_features=2, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=True, sess=sess, output_graph=True) sess.run(tf.global_variables_initializer()) RL.load_net() nk = 0 step = 0 cont = 1 # 存储数据用计数器 array = np.array([0], float) observation_S = array
from RL_brain import DoubleDQN import numpy as np import matplotlib.pyplot as plt import tensorflow as tf env = gym.make('Pendulum-v0') env = env.unwrapped env.seed(1) MEMORY_SIZE = 3000 ACTION_SPACE = 11 sess = tf.Session() with tf.variable_scope('Natural_DQN'): natural_DQN = DoubleDQN(n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=False, sess=sess) with tf.variable_scope('Double_DQN'): double_DQN = DoubleDQN(n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=True, sess=sess, output_graph=True) sess.run(tf.global_variables_initializer())
import gym from RL_brain import DoubleDQN env = gym.make('MountainCar-v0') env = env.unwrapped print(env.action_space, env.observation_space, env.observation_space.high, env.observation_space.low, sep='\n') RL = DoubleDQN(n_actions=env.action_space.n, n_features=env.observation_space.shape[0], learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001) total_steps = 0 for i_episode in range(100): observation = env.reset() ep_r = 0 while True: env.render() action = RL.choose_action(observation) observation_, reward, done, info = env.step(action)
if total_steps > MEMORY_SIZE: RL.learn() if total_steps - MEMORY_SIZE > 20000: break observation = observation_ total_steps += 1 return RL.q if __name__ == "__main__": DDQN = DoubleDQN(n_actions=ACTION_SPACE, n_features=3, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001, double_q=True) q_double = train(DDQN) DQN = DoubleDQN(n_actions=ACTION_SPACE, n_features=3, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001, double_q=False) q_nature = train(DQN) plt.plot(np.array(q_nature), c='r', label='natural') plt.plot(np.array(q_double), c='b', label='double')
import gym from RL_brain import DoubleDQN import numpy as np import matplotlib.pyplot as plt env = gym.make("Pendulum-v0") env = env.unwrapped env.seed(1) MEMORY_SIZE = 3000 ACTION_SPACE = 11 naturel_DQN = DoubleDQN(n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=False) double_DQN = DoubleDQN(n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=True) def train(RL): total_steps = 0 observation = env.reset() while True: if total_steps - MEMORY_SIZE > 8000: env.render() # show the game when trained for some time action = RL.choose_action(observation) f_action = (action - (ACTION_SPACE - 1) / 2) / (
env = env.unwrapped env.seed(21) MEMORY_SIZE = 10000 ACTION_SPACE = 3 FEATURE_SIZE = 2 print(env.action_space, env.observation_space, env.observation_space.high, env.observation_space.low, sep='\n') sess = tf.Session() with tf.variable_scope('Natural_DQN'): natural_DQN = DoubleDQN(n_actions=ACTION_SPACE, n_features=FEATURE_SIZE, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=False, sess=sess) with tf.variable_scope('Prioritized_DQN'): prioritized_DQN = DQNPrioritizedReplay(n_actions=ACTION_SPACE, n_features=FEATURE_SIZE, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, prioritized=True, sess=sess, output_graph=True) sess.run(tf.global_variables_initializer())
episodes.append(episode) break step += 1 # 总步数 # end of game print('game over') env.destroy() if __name__ == "__main__": env = Maze() RL = DoubleDQN(env.n_actions, env.n_features,#observation/state 的属性,如长宽高 learning_rate=0.01, reward_decay=0.9, dueling=True, e_greedy=0.9, replace_target_iter=200, # 每 200 步替换一次 target_net 的参数 memory_size=2000, # 记忆上限 # output_graph=True # 是否输出 tensorboard 文件 ) env.after(100, run_maze)#进行强化学习训练 env.mainloop() # 观看训练时间曲线 his_double = np.vstack((episodes, steps)) file = open('his_dueling_DDQN.pickle', 'wb') pickle.dump(his_double, file) file.close() plt.plot(his_double[0, :], his_double[1, :] - his_double[1, 0], c='b', label='Dueling DDQN')
from RL_brain import DoubleDQN import numpy as np import matplotlib.pyplot as plt import tensorflow as tf env = gym.make('Pendulum-v0') env = env.unwrapped env.seed(1) MEMORY_SIZE = 3000 ACTION_SPACE = 11 sess = tf.Session() with tf.variable_scope('Natural_DQN'): natural_DQN = DoubleDQN( n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=True, sess=sess ) with tf.variable_scope('Double_DQN'): double_DQN = DoubleDQN( n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, double_q=True, sess=sess, output_graph=True) sess.run(tf.global_variables_initializer()) total_steps = 0 all_rewards = [] for i in range(400): s_c = env.reset() done = False step = 0 reward = 0
if (step > 200) and (step % 5 == 0): RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 print('game over') env.destroy() if __name__ == '__main__': env = Maze() RL = DoubleDQN( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) env.after(100, run_maze) env.mainloop() RL.plot_cost()