def calTime(filename): """ 使用Q表来计算完成所有任务消耗的时间 :param filename: :return: """ task = createTask() env = Maze(task) RL = QLearningTable(actions=list(range(env.n_actions)), filename=filename) Time1 = [] # Time2 = [] for i in range(10000): observation = env.reset() while True: action = RL.choose_action_real(str(observation)) observation_, reward, done = env.step(action) # print(observation,action,reward) observation = observation_ if done: time1 = findmax(task) # time2 = calOmegaT(task,np.array([255])[0]) Time1.append(time1) # Time2.append(time2) break # print(np.mean(Time1)) # print(np.mean(Time2)) return np.mean(Time1)
E1 = E t += Baseline(user, G, E1, KE, edgeuser) random.append(t / 10) # 穷举法 H = topology() # 任务图 E2 = E t = Brute(user, H, E2, KE, edgeuser) # (用户,应用,能量,能量限制) brute.append(t) # Q-learning observation = env.reset() stade = copy.deepcopy(observation) while True: # RL 大脑根据 state 的观测值挑选 action action = RL.choose_action_real(str(observation)) # 探索者在环境中实施这个 action, 并得到环境返回的下一个 state 观测值, reward 和 done (是否是掉下地狱或者升上天堂) observation_, reward, done = env.step(action, user) # RL 从这个序列 (state, action, reward, state_) 中学习 RL.learn(str(stade), action, reward, str(observation_)) # 将下一个 state 的值传到下一次循环 stade = copy.deepcopy(observation_) # 如果掉下地狱或者升上天堂, 这回合就结束了 if done: break q_learning.append(7000 - reward) print(reward) x = [5, 10, 15, 20, 25, 30, 35, 40, 45] plt.plot(x, random, marker='o', label="$Random$", c='b') plt.plot(x, brute, marker='x', label="$Brute-force$", c='y')