def __init__(self): # define publisher to control start or stop vrep self.pub_start_signal = rospy.Publisher("/startSimulation", Bool, queue_size=1) self.pub_stop_signal = rospy.Publisher("/stopSimulation", Bool, queue_size=1) # maybe start the simulation with hand would be a good way time.sleep(2) start_signal = Bool() start_signal.data = True self.pub_start_signal.publish(start_signal) # define DQN algorithm tensorflow.reset_default_graph() self.RL1 = DeepQNetwork( n_actions=len(robot1.action_space), n_features=len(robot1.observation_space), learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008, ) self.total_steps = 0 self.rsrvl = 0.05 # to check self.train()
def load_DQN(epsilon_start=0.5): global RL_0 global RL_1 try: RL_1 = DeepQNetwork( env.n_action, env.n_features, 'player_1', 'player_0', # replace_target_iter = 200, memory_size=4000, batch_size=500, epsilon_start=epsilon_start, e_greedy_increment=0.01) except: RL_0 = DeepQNetwork( env.n_action, env.n_features, 'player_0', 'player_1', # replace_target_iter = 200, memory_size=4000, batch_size=500, epsilon_start=epsilon_start, e_greedy_increment=0.01) RL_1 = DeepQNetwork( env.n_action, env.n_features, 'player_1', 'player_0', # replace_target_iter = 200, memory_size=4000, batch_size=500, epsilon_start=epsilon_start, e_greedy_increment=0.01) else: RL_0 = DeepQNetwork( env.n_action, env.n_features, 'player_0', 'player_1', # replace_target_iter = 200, memory_size=4000, batch_size=500, epsilon_start=epsilon_start, e_greedy_increment=0.01)
def step(): def run_maze(): step = 0 for episode in range(300): # initial observation observation = update() while True: # fresh env render() #生成随机环境 # RL choose action based on observation action = RL.choose_action(observation) # RL take action and get next observation and reward observation_, reward, done = step(action) RL.store_transition(observation, action, reward, observation_) if (step > 200) and (step % 5 == 0): RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 time.sleep(0.01) time.sleep(0.02) # end of game print('game over') if __name__ == "__main__": # maze game RL = DeepQNetwork(n_actions=8, n_features=30, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) #env.after(100, run_maze) run_maze() RL.plot_cost()
def __init__(self): # define DQN algorithm tensorflow.reset_default_graph() self.RL1 = DeepQNetwork( n_actions=len(robot1.action_space), n_features=len(robot1.observation_space), learning_rate=0.0001, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.008, ) #0.0008 self.total_steps = 0 self.rsrvl = 0.05 # to check self.train()
def hello(): #print("hello") from maze_env import Maze from RL_brain import DeepQNetwork global env env = Maze() global RL RL = DeepQNetwork( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) env.after(100, run_maze) env.mainloop()
def run_snake(): brain = DeepQNetwork(4, "") snakeGame = SnakeEnv() #先给一个向右走的决策输入,启动游戏 observation, reward, terminal,score =snakeGame.step(np.array([0, 0, 0, 1])) observation = pre_process(observation) brain.set_init_state(observation[:,:,0]) #开始正式游戏 i = 1 # 步数 while i<=500000: i = i + 1 action = brain.choose_action() next_observation, reward, terminal, score = snakeGame.step(action) # print(reward) next_observation = pre_process(next_observation) brain.learn(next_observation, action, reward, terminal) if(i%100) == 0: print(i) # 画loss和round step的曲线 brain.plot_cost() snakeGame.plot_cost()
# automatically creates a local docker container env.configure(remotes=1, fps=5, vnc_driver='go', vnc_kwargs={'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 100, 'subsample_level': 0}) observation_n = env.reset() # step = 0 n_act =3 n_features =2 cur_y,cur_x=resetY,resetX RL = DeepQNetwork(n_act, n_features, learning_rate=0.11, reward_decay=0.9, e_greedy=0.9, hidden_layers=[10, 10], replace_target_iter=200, memory_size=4000, # output_graph=True ) # while True: goal_y,goal_x = centre_button(observation_n[0]) if goal_y ==-1: observation_n, reward_n, done_n, info = env.step([[universe.spaces.PointerEvent(resetX, resetY, 0)]]) cur_x,cur_y=resetX,resetY env.render() continue state=[goal_y-cur_y,goal_x-cur_x] #state = [int(round(n)) for n in state]
import gym import numpy as np import time from RL_brain import DeepQNetwork env = gym.make('sheep-v0') env = env.unwrapped print(env.action_space) print(env.observation_space) RL = DeepQNetwork( n_actions=env.DISCRETE_Action_Count, n_features=env.FEATURE_Count, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001, ) total_steps = 0 REWARD_DISTANCE = 100000 REWARD_RADIUS = 50 for i_episode in range(1000): #reset is not correctly working observation = env._reset() observation = np.asarray(observation) ep_r = 0
NUM_IOT = 50 NUM_FOG = 5 NUM_EPISODE = 1000 NUM_TIME_BASE = 100 MAX_DELAY = 10 NUM_TIME = NUM_TIME_BASE + MAX_DELAY # GENERATE ENVIRONMENT env = Offload(NUM_IOT, NUM_FOG, NUM_TIME, MAX_DELAY) # GENERATE MULTIPLE CLASSES FOR RL iot_RL_list = list() for iot in range(NUM_IOT): iot_RL_list.append( DeepQNetwork( env.n_actions, env.n_features, env.n_lstm_state, env.n_time, learning_rate=0.01, reward_decay=0.9, e_greedy=0.99, replace_target_iter=200, # each 200 steps, update target net memory_size=500, # maximum of memory )) # TRAIN THE SYSTEM train(iot_RL_list, NUM_EPISODE) print('Training Finished')
'show_deck_public': ['+3', '+11', '+15', 'dog', 'DOG', '-5', '+5', '+8', '-8'] }] } action = decide(inputDic) print(action) input("Testing Action") if __name__ == "__main__": # Parse the data properly # data_parser.rl_parse_raw_data() RL = DeepQNetwork(SELL_ACTIONS, SELL_FEATURES, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, output_graph=False) test() train_from_data() print("Displaying the cost...") RL.plot_cost()
index_ = '_4' save_list = [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 1000, 5000, 10000, 50000, 100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000, 900000, 1000000 ] train = True env = envR(show=False) RL = DeepQNetwork(env.n_actions, env.n_features, rows=env.rows, cols=env.cols, learning_rate=0.00001, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=5000, e_greedy_increment=0.0005, output_graph=False) step = 0 succ = 0 total_cost = 0 for episode in range(int(r)): pre_maps = env.reset() for i in range(100): isVisited = True # print('episode:',episode)
import gym from gym import wrappers from RL_brain import DeepQNetwork env = gym.make('MountainCar-v0') env = wrappers.Monitor(env, "./gym-results", force=True) print(env.action_space) print(env.observation_space) print(env.observation_space.high) print(env.observation_space.low) RL = DeepQNetwork(n_actions=3, n_features=2, learning_rate=0.01, e_greedy=0.9, replace_target_iter=300, memory_size=3000, e_greedy_increment=0.0002,) total_steps = 0 for i_episode in range(300): observation = env.reset() ep_r = 0 while True: env.render() action = RL.choose_action(observation) observation_, reward, done, info = env.step(action)
vrep.simxFinish(-1) #clean up the previous stuff clientID = vrep.simxStart('127.0.0.1', 19997, True, True, 5000, 5) if clientID == -1: print("Could not connect to server") sys.exit() print(robot1.action_space) # 查看这个环境中可用的 action 有多少个 print(robot1.observation_space) # 查看这个环境中可用的 state 的 observation 有多少个 # 定义使用 DQN 的算法 RL1 = DeepQNetwork( n_actions=len(robot1.action_space), n_features=len(robot1.observation_space), learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008, ) RL2 = DeepQNetwork( n_actions=len(robot2.action_space), n_features=len(robot2.observation_space), learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008, )
import numpy as np import torch as t from gym import make from RL_brain import DeepQNetwork np.set_printoptions(precision=2, suppress=True) env = make("MountainCar-v0") # env = env.unwrapped print("Observation_space:{}\nAction_space:{}".format(env.observation_space, env.action_space)) RL = DeepQNetwork(env.action_space.n, 2, learning_rate=0.01, e_greedy_increment=0.001, double_q=True, prioritized=True, dueling=True) for i in range(500): observation = env.reset() while True: env.render() action = RL.choose_action(observation) observation_, reward, done, _ = env.step(action) reward = observation_[0] RL.store_transition(observation, action, reward, observation_) if RL.memory_counter > RL.memory_size: RL.learn() if done:
break step += 1 # 总步数 # end of game print('game over') env.destroy() if __name__ == "__main__": env = Maze() RL = DeepQNetwork( env.n_actions, env.n_features, #observation/state 的属性,如长宽高 learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, prioritized=True, replace_target_iter=200, # 每 200 步替换一次 target_net 的参数 memory_size=2000, # 记忆上限 # output_graph=True # 是否输出 tensorboard 文件 ) env.after(100, run_maze) #进行强化学习训练 env.mainloop() #观看训练时间曲线 his_prioritize = np.vstack((episodes, steps)) file = open('his_prioritize.pickle', 'wb') pickle.dump(his_prioritize, file) file.close()
if (step > 200) and (step % 5 == 0): RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 # end of game print('game over') env.destroy() if __name__ == "__main__": # maze game env = Maze() RL = DeepQNetwork(env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, # 每 200 步替换一次 target_net 的参数 memory_size=2000, # 记忆上限 # output_graph=True ) env.after(100, run_maze) env.mainloop() RL.plot_cost()
def main(): #env = grc.RemoteEnv('tmp/sock') #env = TrackedEnv(env) env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1') total_steps = 0 #总的步数 sess = tf.Session() env = DeepQNetwork(n_actions=9, double_q=False, sess=sess, env=env) #初始化类 ''' 设计到复合动作,比如右走同时跳,但是gym的retro环境是一个env.action_space.sample()长度为12d的one_hot数组,每个位置上的1表示一个单一 的动作,比如[0,0,0,0,0,0,1,0,0,0,0,0]表示只往右走,为了获得复合动作,重新设计如下动作,并对设计的复合动作和retro的默认动作进行转换 [[1,0,0,0,0,0,0], 不动 [0,1,0,0,0,0,0], 左走 [0,0,1,0,0,0,0], 右走 [0,0,0,0,1,0,0], 左翻滚 [0,0,0,0,0,1,0], 右翻滚 [0,0,0,1,0,0,0], 蹲下 [0,0,0,0,0,0,1] 在输入进行训练的时候,动作空间就只有这6种,找到对应动作后再进行转换 ''' sess.run( tf.global_variables_initializer() ) #初始化所有变量,变量的初始化必须在模型的其它操作运行之前先明确地完成。最简单的方法就是添加一个给所有变量初始化的操作,并在使用模型之前首先运行那个操作 saver = tf.train.Saver() #保存训练完的所有变量 checkpoint = tf.train.get_checkpoint_state( 'saved_networks') #把训练过模型的放到saved_networks这个文件夹下,下次可以接着从这里开始训练 if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) #如果以前训练过这个模型,可以接着训练 print('Successfully loaded: ', checkpoint.model_checkpoint_path) else: print('loaded failed') #如果没有这个模型就从头开始训练''' #for episode in range(2000): action_stack = deque() #用来记录当前动作需要执行重复次数的栈 info_rings = deque() #用来记录info中rings的变换量的栈 info_rings.append(0) #先放一个0进去 #total_reward=0 #总的奖励值 observation = env.reset( ) #初始化环境,获得最开始的observation,这个observation是一幅彩色图像,但这一帧彩色图像暂时不用 #observation, reward, done, _=env.step(env.action_space.sample())#随机执行一个动作 '''回合最开始的时候没有4帧图像,就用最初的图像重叠4帧''' #x_t_1, r_0, terminal, _=env.step(env.action_space.sample()) #随机执行一个动作,执行完一帧之后得到的图像数据x_t, reward:r_0, terminal:是否终止 x_t_2 = cv2.cvtColor(cv2.resize(observation, (80, 80)), cv2.COLOR_BGR2GRAY) #执行完一帧之后得到的图形数据,转换为80*80,并转换为灰度图 ret, x_t = cv2.threshold(x_t_2, 1, 255, cv2.THRESH_BINARY) #对图像进行二值化转换,黑白两色 s_t = np.stack( (x_t, x_t, x_t, x_t), axis=2) #开始的时候只有一帧图形,batch需要4帧作为一个state,我们把这一帧叠在一起,成为80*80*4的结构 #x=cv2.cvtColor(cv2.resize(observation_, (80,80)), cv2.COLOR_BGR2YCR_CB) for episode in range(3000): while True: env.render() #渲染环境 #下面的动作选择,由于是每4帧图像重叠在一起作为一个state,那么我就把这4帧图像的动作设置成一样,意思就是每个动作重复执行4次 if len(action_stack) > 0: #如果存放动作的栈不空,那么就从里面选择一个动作来执行 action_exute = action_stack.pop() #随意出栈4个动作的其中一个 else: #如果动作栈里面的 动作用完了,那么再次用state选出一个动作 action_button = env.choose_action( s_t) #根据这4帧图像选择动作,这个动作是自己设计的复合动作 action_system = env.action_button_2_onehot( action_button ) #np.append(np.append(a0,a1),b1)# #转变为系统所能是被的动作 for i in range(3): action_stack.append( action_system) #把这个动作放到栈里面去,放3个,加上自己的一个,就是4个重复动作 action_exute = action_system observation_, reward, done, info = env.step( action_exute) #用当前动作获得下一帧的observation_, 当前的奖励等等信息 info_rings.append( info['rings']) #把执行当前动作获得的info里面的rings的个数存到info_rings的栈里面去 if info_rings[1] - info_rings[ 0] < 0: #如果后面的一次动作使得rings的数量减少了,那么奖励值就要减少 reward -= 50 elif info_rings[1] - info_rings[ 0] > 0: #如果后面的一次动作使得rings的数量增加了了,那么奖励值就要增加 reward += 50 else: pass info_rings.popleft() #total_reward+=reward #print(total_reward) #让他往右边走,所以在往右边走的这个动作上设计一个比较小的奖励 #if np.argmax(action_system)==7: # reward=0.1 if done: #如果中途死掉了,就给一个负的奖励 reward = -100 env.reset() x_t1_pre = cv2.cvtColor( cv2.resize(observation_, (80, 80)), cv2.COLOR_BGR2GRAY) #执行完一帧之后得到的图形数据,转换为80*80,并转换为灰度图 ret, x_t1 = cv2.threshold(x_t1_pre, 1, 255, cv2.THRESH_BINARY) #对图像进行二值化转换,黑白两色 x_t1 = np.reshape(x_t1, (80, 80, 1)) #把上面执行动作得到的一帧图像,转换变成80*80*1, #x_t1=RL.image_process(observation_[0]) #把上面执行完动作之后得到的一帧彩色图像observation_处理成黑白的80*80*1的大小 s_t1 = np.append(s_t[:, :, :3], x_t1, axis=2) #把前面3帧和计算得到的1帧加起来作为下一个state #把当前的状态s_t,当前采取的动作action,当前奖励reward,下一个状态s_t1,是否结束的标志存起来 #需要注意的是当前的状态s_t和下一个状态s_t1是4帧图像叠起来的,所以需要在前面进行处理 env.store_memory(s_t, action_button, reward, s_t1, done) #存的时候还是存自己设计的复合动作按键 if (total_steps > 3000) and (total_steps % 100 == 0): #在步数超过200补之后开始学习,并且每50步学习一次 with tf.device("/gpu:0"): env.trainNet() #开始训练网络,学习 if done: break s_t = s_t1 total_steps += 1 if total_steps % 50000 == 0: saver.save(sess, './', global_step=total_steps) print('Game over') env.close()
import gym_abr import numpy as np from RL_brain import DeepQNetwork import os S_INFO = 6 # bit_rate, buffer_size, next_chunk_size, bandwidth_measurement(throughput and time), chunk_til_video_end S_LEN = 8 # take how many frames in the past A_DIM = 6 env = gym.make('ABR-v0') RL = DeepQNetwork( n_actions=A_DIM, n_features=S_INFO * S_LEN, learning_rate=1e-4, e_greedy=0.99, replace_target_iter=100, memory_size=3000, e_greedy_increment=1e-6, ) _file = open('test.csv', 'w') step = 0 episode = 0 while True: #for episode in range(3000): # initial observation ep_r = 0. fetch = 0. observation = env.reset() observation = np.reshape(observation, (S_INFO * S_LEN))
slice_user_seq, max_index = env.generate_subframe_index( association_ues, lamda_k=env.lamda_k, data_num=data_num) # virtual resource allocation r_sk = np.ones([4, 4]) * 0.25 # physical resource allocation total_subframe = max_index pr = PhysicalResource(TPs=TPs, user_qos=env.user_qos, env=env) slice_sat_ratio, slice_avg_RU = pr.allocate( association_ues, r_sk, total_subframe) print('slice_sat_ratio: ', slice_sat_ratio, ' \n slice_avg_RU: ', slice_avg_RU) print('-------end----') if __name__ == '__main__': # main() RL = DeepQNetwork( n_actions=256, n_features=12, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, # 每 200 步替换一次 target_net 的参数 memory_size=3000, # 记忆上限 # output_graph=True # 是否输出 tensorboard 文件 ) train()
# swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 #if LastTime > 50: return(LastTime); # end of game print('game over') #env.destroy() if __name__ == "__main__": # bird game env = game.GameState() # define the game environment -> jump to game actions = 2 features = 5 RL = DeepQNetwork(actions, features, learning_rate=10**-2, reward_decay=1.0, e_greedy=0.6, replace_target_iter=200, memory_size=50, output_graph=True) time.sleep(0.5) run_bird() #env.mainloop() RL.plot_cost()
#swap observation observation = observation_ #break while loop when end of this eposide: if done: break step += 1 print('game over!') env.destroy() if __name__ == '__main__': env = Maze() RL = DeepQNetwork( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, ) env.after(100, run_maze) env.mainloop() RL.plot_cost()
Num_Training = timesteps - Num_Exploration ratio_total_reward = 0.2 RL_set = [] graph_set = [] sess_set = [] for i in range(n_agents): g = tf.Graph() sess = tf.Session(graph=g) with sess.as_default(): with g.as_default(): RL = DeepQNetwork(n_actions=n_actions, n_features=vector_obs_len, sess=sess, agent_id=i, num_training=Num_Training, learning_rate=0.00025, #0.002 reward_decay=0.99, replace_target_iter=5000, memory_size=Num_Exploration, batch_size=32, save_model_freq=10000, load_model=False, ) RL_set.append(RL) # run_this写成一个所有智能体执行的函数 run_this(RL_set, n_episode, learn_freq, Num_Exploration, n_agents, ratio_total_reward)
env_list = [] env_list2 = [] for file_path in file_path_list: df = pd.read_csv(file_path) df = df.sort_values('trade_date', ascending=True) df = df.iloc[22:].reset_index(drop=True) # 去除前几天没有均线信息 env_list.append(stock(df.iloc[0:1500], init_money=1000000, window_size=60)) print(env_list) env_list2.append(stock(df.iloc[1500:].reset_index(drop=True),init_money=1000000, window_size=60)) RL = DeepQNetwork(env_list[0].n_actions, env_list[0].n_features, learning_rate=0.002, reward_decay=0.9, e_greedy=0.9, replace_target_iter=300, memory_size=7000, batch_size=256, # output_graph=True ) run(env_list, max_round) # env = stock(df) # env = BackTest(env, show_log=True) # env.draw('trade.png', 'profit.png') i = 0 for env in env_list2: BackTest(env, show_log=False) name1 = 'trade1_' + str(i) + '.png'
from RL_brain import DeepQNetwork from env import Env from visual import read_log import matplotlib.pyplot as plt import numpy as np import os if __name__ == "__main__": if os.path.exists("log.txt"): os.remove("log.txt") ENV = Env() RL = DeepQNetwork( 4, 2, learning_rate=0.01, reward_decay=0.75, e_greedy=0.8, replace_target_iter=200, memory_size=2000, # output_graph=True ) step = 1 reward_his = [] for episode in range(200): ENV.reset() # 重置环境 re_hi = 0.0 # print("episode:", episode) with open('log.txt', 'a+') as f: f.write("episode:" + str(episode) + "\n") while True: observation = ENV.refresh_env() # 获取环境信息
import gym from RL_brain import DeepQNetwork import time env = gym.make('CartPole-v0') print(env.action_space) print(env.observation_space) print(env.observation_space.high) print(env.observation_space.low) RL = DeepQNetwork( n_actions=env.action_space.n, n_features=len(env.observation_space.high), learning_rate=0.01, e_greedy=0.99, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001, hidden_layers=[20, 20], ) total_steps = 0 for i_episode in range(100): observation = env.reset() while True: env.render() action = RL.choose_action(observation)
# create world world = scenario.make_world() # create multiagent environment env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer=False) # 定义使用 DQN 的算法 RL = DeepQNetwork( n_actions=env.action_space[0].n, n_features=env.observation_space[0].shape[0], learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008, ) total_steps = 0 for i_episode in range(100): # 获取回合 i_episode 第一个 observation observation = env.reset() observation = observation[0] ep_r = 0 cnt = 0 while True: print("i_episode: " + str(i_episode) + " cnt: " + str(cnt))
# servo[7] = servo_max #else: # servo[7] = servo_mid # Iterate through the positions sequence 3 times. for i in range(4): pwm.set_pwm(i, 0, servo[i]) action_num = 16 observation_num = 4 distance_riq = 1 RL = DeepQNetwork(n_actions=action_num, n_features=observation_num, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001,) total_steps = 0 actionDrive = [0,0,0,0] def convert(action): actionDrive = '{0:04b}'.format(action) actionDrive = list(actionDrive) drive(map(int,actionDrive)) ## if action == 0: ## actionDrive = [0,0,0,0]
# break while loop when end of this episode if done: break step += 1 # end of game print('game over') env.destroy() if __name__ == "__main__": # maze game env = Maze() RL = DeepQNetwork( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, output_graph=True # 是否输出图像,True or False ) env.after(100, run_maze) env.mainloop() RL.plot_cost() # VS Code中直接调用tensorboard的方法。 # https://devblogs.microsoft.com/python/python-in-visual-studio-code-february-2021-release/
RL.learn() # 更新状态 observation = observation_ if done: break step += 1 print("game over") env.destroy() if __name__ == "__main__": # maze game env = Maze() RL = DeepQNetwork( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, e_greedy_increment=0.01, # output_graph=True ) env.after(100, run_maze) env.mainloop() RL.plot_cost()
def topology(): "Create a network." net = Mininet_wifi(controller=Controller, accessPoint=OVSKernelAP) print "*** Creating nodes" h1 = net.addHost('h1', mac='00:00:00:00:00:01', ip='10.0.0.1/8') sta14 = net.addStation('sta1', mac='00:00:00:00:00:02', ip='10.0.0.2/8', position='70,50,0') ap1 = net.addAccessPoint('ap1', ssid='ssid-ap1', mode='g', channel='1', position='50,50,0') ap2 = net.addAccessPoint('ap2', ssid='ssid-ap2', mode='g', channel='6', position='70,50,0', range=30) ap3 = net.addAccessPoint('ap3', ssid='ssid-ap3', mode='g', channel='11', position='90,50,0') c1 = net.addController('c1', controller=Controller) net.setPropagationModel(model="logDistance", exp=5) print "*** Configuring wifi nodes" net.configureWifiNodes() print "*** Associating and Creating links" net.addLink(h1, ap1) net.addLink(ap1, ap2) net.addLink(ap2, ap3) # net.addLink(ap1, sta14) """uncomment to plot graph""" # net.plotGraph(max_x=400, max_y=400) net.plotGraph(max_x=120, max_y=120) net.setMobilityModel(time=0, model='RandomWayPoint', max_x=120, max_y=120, min_v=30, max_v=50, seed=5, ac_method='ssf') print "*** Starting network" net.build() c1.start() ap1.start([c1]) ap2.start([c1]) ap3.start([c1]) print "*** Running CLI" # CLI_wifi(net) second = sleeptime(0, 0, 1) new_rssi = [ chanFunt(ap1, sta14), chanFunt(ap2, sta14), chanFunt(ap3, sta14) ] print new_rssi n_actions, n_APs = len(new_rssi), len(new_rssi) brain = DeepQNetwork(n_actions, n_APs, param_file=None) state = new_rssi print 'initial observation:' + str(state) try: while True: time.sleep(second) new_rssi = [ chanFunt(ap1, sta14), chanFunt(ap2, sta14), chanFunt(ap3, sta14) ] # # print new_rssi, rssi_tag(sta14) # # print _getreward(sta14,h1) # # print iperf([sta14, h1], seconds=0.0000001) # # print '*********############*' action, q_value = brain.choose_action(state) reward, nextstate = step(rssi_tag(sta14), action, sta14, ap1, ap2, ap3, h1) # # print 'iperf' + iperf([sta14, h1]) # print new_rssi brain.setPerception(state, action, reward, nextstate) state = nextstate except KeyboardInterrupt: print 'saving replayMemory...' brain.saveReplayMemory() pass # # print new_rssi # # snr_dict = map(setSNR,new_rssi) # # print "*** Stopping network" net.stop()