Python WoLFAgentの例、wolf_agent.WoLFAgent Pythonの例

コード例 #1

0

ファイルを表示

ファイル: draw_bandwidth.py プロジェクト: T610/MEC

    def run_for_only_mec(self,bw1,un1):
        nb_episode = 1000
        actions_set = [
            [1, 0, 0.1],
            [1, 0, 0.5],
            [1, 0, 1],
            [1, 0, 2]]
        actions = np.arange(len(actions_set))
        user_num = un1
        lambda_n = np.zeros(user_num)
        for i in range(user_num):  # 每比特需要周期量 70~800 cycles/bits
            if i % 5 == 0:
                lambda_n[i] = 0.001
            if i % 5 == 1:
                lambda_n[i] = 0.01
            if i % 5 == 2:
                lambda_n[i] = 0.1
            if i % 5 == 3:
                lambda_n[i] = 0.001
            if i % 5 == 4:
                lambda_n[i] = 0.01

        GPD1_array = [4 * pow(10, 6) for _ in range(user_num)]
        GPD2_array = [0.3 for _ in range(user_num)]

        # init wolf agent
        wolf_agent_array = []
        for i in range(user_num):
            wolf_agent_array.append(WoLFAgent(alpha=0.1, actions=actions, high_delta=0.004, low_delta=0.002))

        queue_relay_array = []

        for i in range(user_num):
            queue_relay_array.append(QueueRelay(lambda_n[i], GPD1_array[i], GPD2_array[i]))

        # set reward functio

        # reward = Reward()
        reward_history = []
        # init_Queue_relay

        Q_array_histroy = [[10] for i in range(user_num)]  ##  TLIU

        for episode in range(nb_episode):

            Q_array = []
            Qx_array = []
            Qy_array = []
            Qz_array = []
            M1_array = []
            M2_array = []

            for i in range(user_num):
                Q_array.append(queue_relay_array[i].Q)
                Qx_array.append(queue_relay_array[i].Qx)
                Qy_array.append(queue_relay_array[i].Qy)
                Qz_array.append(queue_relay_array[i].Qz)
                M1_array.append(queue_relay_array[i].M1)
                M2_array.append(queue_relay_array[i].M2)

            for i in range(user_num):
                Q_array_histroy[i].append(Q_array[i])
            if episode % 50 == 0 and episode != 0:
                for i in range(user_num):
                    aa = GPD()
                    data = Q_array_histroy[i]
                    # data = [10000000000000 for i in range(200) ]
                    # res = aa.gpd(  data  , 3.96*pow(10,5)  )
                    res = aa.gpd(data, 3.96 * pow(10, 6))
                    if res:
                        queue_relay_array[i].GPD1 = res[0][0]
                        queue_relay_array[i].GPD2 = res[0][1]
                        queue_relay_array[i].updateM1()
                        queue_relay_array[i].updateM2()

            iteration_actions = []
            for i in range(user_num):
                iteration_actions.append(wolf_agent_array[i].act())
            game = MatrixGame_mec(actions=iteration_actions, Q=Q_array,
                                  Qx=Qx_array, Qy=Qy_array, Qz=Qz_array,
                                  M1=M1_array,
                                  M2=M2_array, BW=bw1)

            #print('Q value :' + str(Q_array) + str(Qx_array) + str(Qy_array) + str(Qz_array))

            reward, bn, lumbda, rff = game.step(actions=iteration_actions)
            for i in range(user_num):
                # wolf agent act
                # update_Queue_relay
                queue_relay_array[i].lumbda = lumbda[i]
                queue_relay_array[i].updateQ(bn[i], actions_set[iteration_actions[i]][0], rff[i])
                queue_relay_array[i].updateQx()
                queue_relay_array[i].updateQy()
                queue_relay_array[i].updateQz()

            # reward step
            reward_history.append(sum(reward))
            for i in range(user_num):
                wolf_agent_array[i].observe(reward=reward[i])

        # for i in range(user_num):
        #     print(wolf_agent_array[i].pi_average)
        # plt.plot(np.arange(len(reward_history)), reward_history, label="")
        # plt.show()

        return reward_history[-1]

コード例 #2

0

ファイルを表示

    def run_for_all_mode(self, bw, un):
        nb_episode = 2000
        actions = np.arange(8)
        user_num = un
        lambda_n = np.zeros(user_num)

        for i in range(user_num):  # 每比特需要周期量 70~800 cycles/bits
            if i % 5 == 0:
                lambda_n[i] = 0.001
            if i % 5 == 1:
                lambda_n[i] = 0.01
            if i % 5 == 2:
                lambda_n[i] = 0.1
            if i % 5 == 3:
                lambda_n[i] = 0.001
            if i % 5 == 4:
                lambda_n[i] = 0.01
        actions_set = [[0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                       [0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                       [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4]]
        GPD1_array = [4 * pow(10, 6) for _ in range(user_num)]
        GPD2_array = [0.3 for _ in range(user_num)]

        # init wolf agent
        wolf_agent_array = []
        for i in range(user_num):
            wolf_agent_array.append(
                WoLFAgent(alpha=0.1,
                          actions=actions,
                          high_delta=0.004,
                          low_delta=0.002))

        queue_relay_array = []

        for i in range(user_num):
            queue_relay_array.append(
                QueueRelay(lambda_n[i], GPD1_array[i], GPD2_array[i]))

        # set reward functio

        # reward = Reward()
        reward_history = []

        cost_local_history = []
        # init_Queue_relay

        Q_array_histroy = [[10] for i in range(user_num)]  ##  TLIU

        for episode in range(nb_episode):
            print('episode for all :', episode)

            Q_array = []
            Qx_array = []
            Qy_array = []
            Qz_array = []
            M1_array = []
            M2_array = []

            for i in range(user_num):
                Q_array.append(queue_relay_array[i].Q)
                Qx_array.append(queue_relay_array[i].Qx)
                Qy_array.append(queue_relay_array[i].Qy)
                Qz_array.append(queue_relay_array[i].Qz)
                M1_array.append(queue_relay_array[i].M1)
                M2_array.append(queue_relay_array[i].M2)

            for i in range(user_num):
                Q_array_histroy[i].append(Q_array[i])
            if episode % 50 == 0 and episode != 0:
                for i in range(user_num):

                    data = Q_array_histroy[i]
                    # data = [10000000000000 for i in range(200) ]
                    # res = aa.gpd(  data  , 3.96*pow(10,5)  )
                    res = self.gpdaa.gpd(data, 3.96 * pow(10, 7))
                    if res:
                        queue_relay_array[i].GPD1 = res[0][0]
                        queue_relay_array[i].GPD2 = res[0][1]
                        queue_relay_array[i].updateM1()
                        queue_relay_array[i].updateM2()

            iteration_actions = []
            for i in range(user_num):
                iteration_actions.append(wolf_agent_array[i].act())
            game = MatrixGame(actions=iteration_actions,
                              Q=Q_array,
                              Qx=Qx_array,
                              Qy=Qy_array,
                              Qz=Qz_array,
                              M1=M1_array,
                              M2=M2_array,
                              BW=bw)

            reward, cost_local, bn, lumbda, rff = game.step(
                actions=iteration_actions)
            for i in range(user_num):
                # wolf agent act
                # update_Queue_relay
                queue_relay_array[i].lumbda = lumbda[i]
                queue_relay_array[i].updateQ(
                    bn[i], actions_set[iteration_actions[i]][0], rff[i])
                queue_relay_array[i].updateQx()
                queue_relay_array[i].updateQy()
                queue_relay_array[i].updateQz()

            # reward step
            reward_history.append(sum(reward))

            cost_local_history.append(sum(cost_local))

            for i in range(user_num):
                wolf_agent_array[i].observe(reward=reward[i])

        # for i in range(user_num):
        #     print(wolf_agent_array[i].pi_average)

        plt.plot(np.arange(len(reward_history)), reward_history, label="")
        plt.title('all mode ')
        plt.show()
        print('reward_history[-1]:', reward_history[-1])

        return cost_local_history[-1]

コード例 #3

0

ファイルを表示

ファイル: new_run_for_different_dl1_dh4.py プロジェクト: T610/MEC

        if i % 5 == 3:
            lambda_n[i] = 0.001
        if i % 5 == 4:
            lambda_n[i] = 0.01
    actions_set = [[0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                   [0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                   [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4]]
    GPD1_array = [4 * pow(10, 6) for _ in range(user_num)]
    GPD2_array = [0.3 for _ in range(user_num)]

    # init wolf agent
    wolf_agent_array = []
    for i in range(user_num):
        wolf_agent_array.append(
            WoLFAgent(alpha=0.1,
                      actions=actions,
                      high_delta=0.006,
                      low_delta=0.002))

    queue_relay_array = []

    for i in range(user_num):
        queue_relay_array.append(
            QueueRelay(lambda_n[i], GPD1_array[i], GPD2_array[i]))

    # set reward functio

    # reward = Reward()
    reward_history = []
    # init_Queue_relay

    Q_array_histroy = [[10] for i in range(user_num)]  ##  TLIU

コード例 #4

0

ファイルを表示

ファイル: draw_wolf_phc_dh_dl.py プロジェクト: T610/MEC

    def wolf_cal_reward(self, DL, DH):

        nb_episode = 3000
        actions = np.arange(8)
        user_num = 10
        lambda_n = np.zeros(user_num)
        OUTPUT = []  #
        # PR = [[] for i in range(user_num)]
        gpdtemp = GPD()

        for i in range(user_num):  # 每比特需要周期量 70~800 cycles/bits
            if i % 5 == 0:
                lambda_n[i] = 0.001
            if i % 5 == 1:
                lambda_n[i] = 0.01
            if i % 5 == 2:
                lambda_n[i] = 0.1
            if i % 5 == 3:
                lambda_n[i] = 0.001
            if i % 5 == 4:
                lambda_n[i] = 0.01
        actions_set = [[0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                       [0, 5 * pow(10, 6), 0.4], [0, 5 * pow(10, 6), 0.4],
                       [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4], [1, 0, 0.4]]
        GPD1_array = [4 * pow(10, 6) for _ in range(user_num)]
        GPD2_array = [0.3 for _ in range(user_num)]

        # init wolf agent
        wolf_agent_array = []
        for i in range(user_num):
            wolf_agent_array.append(
                WoLFAgent(alpha=0.1,
                          actions=actions,
                          high_delta=DH,
                          low_delta=DL))

        queue_relay_array = []

        for i in range(user_num):
            queue_relay_array.append(
                QueueRelay(lambda_n[i], GPD1_array[i], GPD2_array[i]))

        # set reward functio

        # reward = Reward()
        reward_history = []
        # init_Queue_relay

        Q_array_histroy = [[10] for i in range(user_num)]  ##  TLIU

        for episode in range(nb_episode):

            Q_array = []
            Qx_array = []
            Qy_array = []
            Qz_array = []
            M1_array = []
            M2_array = []

            for i in range(user_num):
                Q_array.append(queue_relay_array[i].Q)
                Qx_array.append(queue_relay_array[i].Qx)
                Qy_array.append(queue_relay_array[i].Qy)
                Qz_array.append(queue_relay_array[i].Qz)
                M1_array.append(queue_relay_array[i].M1)
                M2_array.append(queue_relay_array[i].M2)

            ##  TLIU,GPD

            for i in range(user_num):
                Q_array_histroy[i].append(Q_array[i])
            if episode % 50 == 0 and episode != 0:
                for i in range(user_num):

                    data = Q_array_histroy[i]
                    # data = [10000000000000 for i in range(200) ]
                    # res = aa.gpd(  data  , 3.96*pow(10,5)  )

                    res = gpdtemp.gpd(data, 3.96 * pow(10, 6))
                    if res:
                        if len(res) > 1:
                            if res[1]:
                                # probability = res[1]
                                pass
                        if res[0]:
                            print(res)
                            queue_relay_array[i].GPD1 = res[0][0]
                            queue_relay_array[i].GPD2 = res[0][1]
                            queue_relay_array[i].updateM1()
                            queue_relay_array[i].updateM2()
            ##  TLIU

            iteration_actions = []
            for i in range(user_num):
                iteration_actions.append(wolf_agent_array[i].act())
            game = MatrixGame(actions=iteration_actions,
                              Q=Q_array,
                              Qx=Qx_array,
                              Qy=Qy_array,
                              Qz=Qz_array,
                              M1=M1_array,
                              M2=M2_array,
                              BW=10 * pow(10, 6))

            reward, bn, lumbda, rff = game.step(actions=iteration_actions)
            print("episode", episode, "reward", sum(reward))
            OUTPUT.append(sum(reward))

            for i in range(user_num):
                # wolf agent act
                # update_Queue_relay
                queue_relay_array[i].lumbda = lumbda[i]
                queue_relay_array[i].updateQ(
                    bn[i], actions_set[iteration_actions[i]][0], rff[i])
                queue_relay_array[i].updateQx()
                queue_relay_array[i].updateQy()
                queue_relay_array[i].updateQz()

            # reward step
            reward_history.append(sum(reward))
            for i in range(user_num):
                wolf_agent_array[i].observe(reward=reward[i])

        for i in range(user_num):
            print('pi_average', wolf_agent_array[i].pi_average)

        plt.plot(np.arange(len(reward_history)), reward_history, label="all")
        plt.title('wolf_dl' + str(DL) + '-dh' + str(DH))
        plt.show()

        return np.mean(reward_history[-300:])

コード例 #5

0

ファイルを表示

ファイル: run.py プロジェクト: ipangkang/WOLF-PHC-for-multiagent

import numpy as np
import matplotlib.pyplot as plt
from wolf_agent import WoLFAgent
from matrix_game import MatrixGame
import pandas as pd

if __name__ == '__main__':
    nb_episode = 1000

    actions = np.arange(3)
    agent1 = WoLFAgent(alpha=0.1,
                       actions=actions,
                       high_delta=0.0004,
                       low_delta=0.0002)
    agent2 = WoLFAgent(alpha=0.1,
                       actions=actions,
                       high_delta=0.0004,
                       low_delta=0.0002)
    agent3 = WoLFAgent(alpha=0.1,
                       actions=actions,
                       high_delta=0.0004,
                       low_delta=0.0002)

    game = MatrixGame()
    for episode in range(nb_episode):
        actions = []
        action1 = agent1.act()
        action2 = agent2.act()
        action3 = agent3.act()
        actions.append(action1)
        actions.append(action2)

コード例 #6

0

ファイルを表示

    # actions_set = [[0, 5 * pow(10, 6), 0.4],
    #      [0, 5 * pow(10, 6), 0.4],
    #      [0, 5 * pow(10, 6), 0.4],
    #      [0, 5 * pow(10, 6), 0.4],
    #      [1, 0, 0.4],
    #      [1,0, 0.4],
    #      [1, 0, 0.4],
    #      [1, 0, 0.4]]

    GPD1_array = [4 * pow(10, 6) for _ in range(user_num)]
    GPD2_array = [0.3 for _ in range(user_num)]

    #init wolf agent 
    wolf_agent_array = []
    for i in range(user_num):
        wolf_agent_array.append(WoLFAgent(alpha=0.1, actions=actions, high_delta=0.004, low_delta=0.002))

    
    queue_relay_array = []

    for i in range(user_num):
        queue_relay_array.append(QueueRelay(lambda_n[i], GPD1_array[i], GPD2_array[i]))
    
    #set reward functio

    # reward = Reward()
    reward_history  = []
    #init_Queue_relay
    
    Q_array_histroy = [  [10] for i in range(user_num)  ]     ##  TLIU

コード例 #7

0

ファイルを表示

ファイル: run.py プロジェクト: tocom242242/wolf_phc

import numpy as np
import matplotlib.pyplot as plt
from wolf_agent import WoLFAgent
from matrix_game import MatrixGame
import pandas as pd

if __name__ == '__main__':
    nb_episode = 1000

    actions = np.arange(2)
    agent1 = WoLFAgent(alpha=0.1,
                       actions=actions,
                       high_delta=0.0004,
                       low_delta=0.0002)
    agent2 = WoLFAgent(alpha=0.1,
                       actions=actions,
                       high_delta=0.0004,
                       low_delta=0.0002)

    game = MatrixGame()
    for episode in range(nb_episode):
        action1 = agent1.act()
        action2 = agent2.act()

        _, r1, r2 = game.step(action1, action2)

        agent1.observe(reward=r1)
        agent2.observe(reward=r2)

    print(agent1.pi)
    print(agent2.pi)