Exemple #1
0
    def run_for_all_mode(self, bw, un):
        nb_episode = 2000
        actions = np.arange(8)
        user_num = un
        lambda_n = np.zeros(user_num)

        for i in range(user_num):  # 每比特需要周期量 70~800 cycles/bits
            if i % 5 == 0:
                lambda_n[i] = 0.001
            if i % 5 == 1:
                lambda_n[i] = 0.01
            if i % 5 == 2:
                lambda_n[i] = 0.1
            if i % 5 == 3:
                lambda_n[i] = 0.001
            if i % 5 == 4:
                lambda_n[i] = 0.01
        actions_set = [[0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                       [0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                       [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4]]
        GPD1_array = [4 * pow(10, 6) for _ in range(user_num)]
        GPD2_array = [0.3 for _ in range(user_num)]

        # init wolf agent
        wolf_agent_array = []
        for i in range(user_num):
            wolf_agent_array.append(
                WoLFAgent(alpha=0.1,
                          actions=actions,
                          high_delta=0.004,
                          low_delta=0.002))

        queue_relay_array = []

        for i in range(user_num):
            queue_relay_array.append(
                QueueRelay(lambda_n[i], GPD1_array[i], GPD2_array[i]))

        # set reward functio

        # reward = Reward()
        reward_history = []

        cost_local_history = []
        # init_Queue_relay

        Q_array_histroy = [[10] for i in range(user_num)]  ##  TLIU

        for episode in range(nb_episode):
            print('episode for all :', episode)

            Q_array = []
            Qx_array = []
            Qy_array = []
            Qz_array = []
            M1_array = []
            M2_array = []

            for i in range(user_num):
                Q_array.append(queue_relay_array[i].Q)
                Qx_array.append(queue_relay_array[i].Qx)
                Qy_array.append(queue_relay_array[i].Qy)
                Qz_array.append(queue_relay_array[i].Qz)
                M1_array.append(queue_relay_array[i].M1)
                M2_array.append(queue_relay_array[i].M2)

            for i in range(user_num):
                Q_array_histroy[i].append(Q_array[i])
            if episode % 50 == 0 and episode != 0:
                for i in range(user_num):

                    data = Q_array_histroy[i]
                    # data = [10000000000000 for i in range(200) ]
                    # res = aa.gpd(  data  , 3.96*pow(10,5)  )
                    res = self.gpdaa.gpd(data, 3.96 * pow(10, 7))
                    if res:
                        queue_relay_array[i].GPD1 = res[0][0]
                        queue_relay_array[i].GPD2 = res[0][1]
                        queue_relay_array[i].updateM1()
                        queue_relay_array[i].updateM2()

            iteration_actions = []
            for i in range(user_num):
                iteration_actions.append(wolf_agent_array[i].act())
            game = MatrixGame(actions=iteration_actions,
                              Q=Q_array,
                              Qx=Qx_array,
                              Qy=Qy_array,
                              Qz=Qz_array,
                              M1=M1_array,
                              M2=M2_array,
                              BW=bw)

            reward, cost_local, bn, lumbda, rff = game.step(
                actions=iteration_actions)
            for i in range(user_num):
                # wolf agent act
                # update_Queue_relay
                queue_relay_array[i].lumbda = lumbda[i]
                queue_relay_array[i].updateQ(
                    bn[i], actions_set[iteration_actions[i]][0], rff[i])
                queue_relay_array[i].updateQx()
                queue_relay_array[i].updateQy()
                queue_relay_array[i].updateQz()

            # reward step
            reward_history.append(sum(reward))

            cost_local_history.append(sum(cost_local))

            for i in range(user_num):
                wolf_agent_array[i].observe(reward=reward[i])

        # for i in range(user_num):
        #     print(wolf_agent_array[i].pi_average)

        plt.plot(np.arange(len(reward_history)), reward_history, label="")
        plt.title('all mode ')
        plt.show()
        print('reward_history[-1]:', reward_history[-1])

        return cost_local_history[-1]
Exemple #2
0
    agent1 = MiniMaxQLearner(aid=0,
                             alpha=0.1,
                             policy=EpsGreedyQPolicy(),
                             actions=np.arange(2))  # agentの設定
    agent2 = MiniMaxQLearner(aid=1,
                             alpha=0.1,
                             policy=EpsGreedyQPolicy(),
                             actions=np.arange(2))  # agentの設定

    game = MatrixGame()
    for episode in range(nb_episode):
        action1 = agent1.act()
        action2 = agent2.act()

        _, r1, r2 = game.step(action1, action2)

        agent1.observe(reward=r1, opponent_action=agent2.previous_action)
        agent2.observe(reward=r2, opponent_action=agent1.previous_action)
    print(agent1.pi)
    print(agent2.pi)
    # ipdb.set_trace()
    plt.plot(np.arange(len(agent1.pi_history)),
             agent1.pi_history,
             label="agent1's pi(0)")
    plt.plot(np.arange(len(agent2.pi_history)),
             agent2.pi_history,
             label="agent2's pi(0)")
    plt.ylim(0, 1)
    plt.xlabel("episode")
    plt.ylabel("pi(0)")
Exemple #3
0
    def wolf_cal_reward(self, DL, DH):

        nb_episode = 3000
        actions = np.arange(8)
        user_num = 10
        lambda_n = np.zeros(user_num)
        OUTPUT = []  #
        # PR = [[] for i in range(user_num)]
        gpdtemp = GPD()

        for i in range(user_num):  # 每比特需要周期量 70~800 cycles/bits
            if i % 5 == 0:
                lambda_n[i] = 0.001
            if i % 5 == 1:
                lambda_n[i] = 0.01
            if i % 5 == 2:
                lambda_n[i] = 0.1
            if i % 5 == 3:
                lambda_n[i] = 0.001
            if i % 5 == 4:
                lambda_n[i] = 0.01
        actions_set = [[0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                       [0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                       [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4]]
        GPD1_array = [4 * pow(10, 6) for _ in range(user_num)]
        GPD2_array = [0.3 for _ in range(user_num)]

        # init wolf agent
        wolf_agent_array = []
        for i in range(user_num):
            wolf_agent_array.append(
                WoLFAgent(alpha=0.1,
                          actions=actions,
                          high_delta=DH,
                          low_delta=DL))

        queue_relay_array = []

        for i in range(user_num):
            queue_relay_array.append(
                QueueRelay(lambda_n[i], GPD1_array[i], GPD2_array[i]))

        # set reward functio

        # reward = Reward()
        reward_history = []
        # init_Queue_relay

        Q_array_histroy = [[10] for i in range(user_num)]  ##  TLIU

        for episode in range(nb_episode):

            Q_array = []
            Qx_array = []
            Qy_array = []
            Qz_array = []
            M1_array = []
            M2_array = []

            for i in range(user_num):
                Q_array.append(queue_relay_array[i].Q)
                Qx_array.append(queue_relay_array[i].Qx)
                Qy_array.append(queue_relay_array[i].Qy)
                Qz_array.append(queue_relay_array[i].Qz)
                M1_array.append(queue_relay_array[i].M1)
                M2_array.append(queue_relay_array[i].M2)

            ##  TLIU,GPD

            for i in range(user_num):
                Q_array_histroy[i].append(Q_array[i])
            if episode % 50 == 0 and episode != 0:
                for i in range(user_num):

                    data = Q_array_histroy[i]
                    # data = [10000000000000 for i in range(200) ]
                    # res = aa.gpd(  data  , 3.96*pow(10,5)  )

                    res = gpdtemp.gpd(data, 3.96 * pow(10, 6))
                    if res:
                        if len(res) > 1:
                            if res[1]:
                                # probability = res[1]
                                pass
                        if res[0]:
                            print(res)
                            queue_relay_array[i].GPD1 = res[0][0]
                            queue_relay_array[i].GPD2 = res[0][1]
                            queue_relay_array[i].updateM1()
                            queue_relay_array[i].updateM2()
            ##  TLIU

            iteration_actions = []
            for i in range(user_num):
                iteration_actions.append(wolf_agent_array[i].act())
            game = MatrixGame(actions=iteration_actions,
                              Q=Q_array,
                              Qx=Qx_array,
                              Qy=Qy_array,
                              Qz=Qz_array,
                              M1=M1_array,
                              M2=M2_array,
                              BW=10 * pow(10, 6))

            reward, bn, lumbda, rff = game.step(actions=iteration_actions)
            print("episode", episode, "reward", sum(reward))
            OUTPUT.append(sum(reward))

            for i in range(user_num):
                # wolf agent act
                # update_Queue_relay
                queue_relay_array[i].lumbda = lumbda[i]
                queue_relay_array[i].updateQ(
                    bn[i], actions_set[iteration_actions[i]][0], rff[i])
                queue_relay_array[i].updateQx()
                queue_relay_array[i].updateQy()
                queue_relay_array[i].updateQz()

            # reward step
            reward_history.append(sum(reward))
            for i in range(user_num):
                wolf_agent_array[i].observe(reward=reward[i])

        for i in range(user_num):
            print('pi_average', wolf_agent_array[i].pi_average)

        plt.plot(np.arange(len(reward_history)), reward_history, label="all")
        plt.title('wolf_dl' + str(DL) + '-dh' + str(DH))
        plt.show()

        return np.mean(reward_history[-300:])
                    queue_relay_array[i].updateM2()
        ##  TLIU

        iteration_actions = []
        for i in range(user_num):
            iteration_actions.append(wolf_agent_array[i].act())
        game = MatrixGame(actions=iteration_actions,
                          Q=Q_array,
                          Qx=Qx_array,
                          Qy=Qy_array,
                          Qz=Qz_array,
                          M1=M1_array,
                          M2=M2_array,
                          BW=10 * pow(10, 6))

        reward, _, bn, lumbda, rff = game.step(actions=iteration_actions)
        print("episode", episode, "reward", sum(reward))
        OUTPUT.append(sum(reward))

        for i in range(user_num):
            # wolf agent act
            # update_Queue_relay
            queue_relay_array[i].lumbda = lumbda[i]
            queue_relay_array[i].updateQ(bn[i],
                                         actions_set[iteration_actions[i]][0],
                                         rff[i])
            queue_relay_array[i].updateQx()
            queue_relay_array[i].updateQy()
            queue_relay_array[i].updateQz()

        # reward step
                       low_delta=0.0002)
    agent3 = WoLFAgent(alpha=0.1,
                       actions=actions,
                       high_delta=0.0004,
                       low_delta=0.0002)

    game = MatrixGame()
    for episode in range(nb_episode):
        actions = []
        action1 = agent1.act()
        action2 = agent2.act()
        action3 = agent3.act()
        actions.append(action1)
        actions.append(action2)
        actions.append(action3)
        _, reward = game.step(actions)

        agent1.observe(reward=reward[0])
        agent2.observe(reward=reward[1])
        agent3.observe(reward=reward[2])

    print(agent1.q_values)
    print(agent2.q_values)
    print(agent3.q_values)
    # plt.plot(np.arange(len(agent1.pi_history)),agent1.pi_history, label="agent1's pi(0)")
    # plt.plot(np.arange(len(agent2.pi_history)),agent2.pi_history, label="agent2's pi(0)")
    # plt.plot(np.arange(len(agent3.pi_history)),agent3.pi_history, label="agent3's pi(0)")
    # plt.ylim(0, 1)
    # plt.xlabel("episode")
    # plt.ylabel("pi(0)")
    # plt.legend()