예제 #1
0
def calTime(filename):
    """
    使用Q表来计算完成所有任务消耗的时间
    :param filename:
    :return:
    """
    task = createTask()
    env = Maze(task)
    RL = QLearningTable(actions=list(range(env.n_actions)), filename=filename)
    Time1 = []
    # Time2 = []
    for i in range(10000):
        observation = env.reset()
        while True:
            action = RL.choose_action_real(str(observation))
            observation_, reward, done = env.step(action)
            # print(observation,action,reward)
            observation = observation_
            if done:
                time1 = findmax(task)
                # time2 = calOmegaT(task,np.array([255])[0])
                Time1.append(time1)
                # Time2.append(time2)
                break
    # print(np.mean(Time1))
    # print(np.mean(Time2))
    return np.mean(Time1)
예제 #2
0
파일: run_this.py 프로젝트: dyc8818/ec-rl
            E1 = E
            t += Baseline(user, G, E1, KE, edgeuser)
        random.append(t / 10)

        # 穷举法
        H = topology()  # 任务图
        E2 = E
        t = Brute(user, H, E2, KE, edgeuser)  # (用户,应用,能量,能量限制)
        brute.append(t)

        # Q-learning
        observation = env.reset()
        stade = copy.deepcopy(observation)
        while True:
            # RL 大脑根据 state 的观测值挑选 action
            action = RL.choose_action_real(str(observation))
            # 探索者在环境中实施这个 action, 并得到环境返回的下一个 state 观测值, reward 和 done (是否是掉下地狱或者升上天堂)
            observation_, reward, done = env.step(action, user)
            # RL 从这个序列 (state, action, reward, state_) 中学习
            RL.learn(str(stade), action, reward, str(observation_))
            # 将下一个 state 的值传到下一次循环
            stade = copy.deepcopy(observation_)
            # 如果掉下地狱或者升上天堂, 这回合就结束了
            if done:
                break
        q_learning.append(7000 - reward)
        print(reward)

    x = [5, 10, 15, 20, 25, 30, 35, 40, 45]
    plt.plot(x, random, marker='o', label="$Random$", c='b')
    plt.plot(x, brute, marker='x', label="$Brute-force$", c='y')