コード例 #1
0
    def __init__(self):
        # Internet topology
        self.generator = MiniGenerator(10, 2)
        self.generator.build_topology()

        self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs
        self.N = 12
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        self.TF = collections.defaultdict(dict)
        self.tf_num = 10
        self.TF[6][9] = 1
        self.TF[6][10] = 1
        self.TF[7][4] = 1
        self.TF[7][5] = 1
        self.TF[8][10] = 1
        self.TF[8][11] = 1
        self.TF[0][9] = 1
        self.TF[1][11] = 1
        self.TF[2][5] = 1
        self.TF[3][4] = 1

        self.global_optimal = 600
        self.episode = 0.8

        # for each agent, add [s,a,r,s'] as element. size
        self.experience_pool = collections.defaultdict(dict)
        self.pool_size = 50
        self.sample_size = 10
        for i in range(self.tf_num):
            for j in range(self.N):
                self.experience_pool[i][j] = []
コード例 #2
0
    def __init__(self):
        # Internet topology
        self.generator = MiniGenerator(10, 2)
        self.generator.build_topology()
        self.generator.build_directed_matrix()

        self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs
        self.N = 12
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        # for each agent, add [s,a,r,s'] as element. size
        self.experience_pool = collections.defaultdict(list)
        self.pool_size = 10

        # TODO, define TF, Matrix, Linprog
        self.TF = collections.defaultdict(dict)
        self.TF[6][9] = 1
        self.TF[6][10] = 1
        self.TF[7][4] = 1
        self.TF[7][5] = 1
        self.TF[8][10] = 1
        self.TF[8][11] = 1
        self.TF[0][9] = 1
        self.TF[1][11] = 1
        self.TF[2][5] = 1
        self.TF[3][4] = 1
コード例 #3
0
    def __init__(self):
        # Internet topology
        self.generator = MiniGenerator(10, 2)
        self.generator.build_topology()
        self.generator.build_directed_matrix()

        self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs
        self.N = 12
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        self.TF = collections.defaultdict(dict)
        self.tf_num = 10
        self.TF[6][9] = 1
        self.TF[6][10] = 1
        self.TF[7][4] = 1
        self.TF[7][5] = 1
        self.TF[8][10] = 1
        self.TF[8][11] = 1
        self.TF[0][9] = 1
        self.TF[1][11] = 1
        self.TF[2][5] = 1
        self.TF[3][4] = 1

        # self.TF[9][6] = 1
        # self.TF[10][6] = 1
        # self.TF[4][7] = 1
        # self.TF[5][7] = 1
        # self.TF[10][8] = 1
        # self.TF[11][8] = 1
        # self.TF[9][0] = 1
        # self.TF[11][1] = 1
        # self.TF[5][2] = 1
        # self.TF[4][3] = 1

        # for each agent, add [s,a,r,s'] as element. size
        self.experience_pool = collections.defaultdict(dict)
        self.pool_size = 10
        for i in range(self.tf_num):
            for j in range(self.N):
                self.experience_pool[i][j] = []
コード例 #4
0
class Game:
    def __init__(self):
        # Internet topology
        self.generator = MiniGenerator(10, 2)
        self.generator.build_topology()
        self.generator.build_directed_matrix()

        self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs

        self.N = 12
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        # for each agent, add [s,a,r,s'] as element. size
        self.experience_pool = collections.defaultdict(list)
        self.pool_size = 10
        self.global_optimal = 0
        self.int_optimal = 0

        # TODO, define TF, Matrix, Linprog
        self.TF = collections.defaultdict(dict)
        self.TF[6][11] = 1
        self.TF[8][9] = 1
        self.TF[7][10] = 1
        self.TF[2][5] = 1
        self.TF[3][4] = 1
        self.TF[4][3] = 1
        self.TF[5][2] = 1
        self.TF[10][7] = 1
        self.TF[9][8] = 1
        self.TF[11][6] = 1
        self.TF[8][0] = 1
        self.TF[9][1] = 1
        self.TF[7][5] = 1
        self.TF[10][3] = 1

        # self.TF[6][9] = 1
        # self.TF[6][10] = 1
        # self.TF[7][0] = 1
        # self.TF[7][1] = 1
        # self.TF[8][10] = 1
        # self.TF[8][11] = 1
        # self.TF[0][9] = 1
        # self.TF[1][11] = 1
        # self.TF[2][5] = 1
        # self.TF[3][4] = 1


        # intlp = IntLp(self.generator.matrix, self.TF)
        # intlp.solve_ilp()
        #
        # print("===========================================")
        #
        # linear = Linprog(self.generator.matrix, self.TF)
        # self.global_optimal = linear.solve_linprog()

    def play_game(self):
        print("play")
        sess = tf.Session()
        print('sess')

        """
            basic states for every node
        """
        states = collections.defaultdict(list)
        for i in range(self.N):
            # add neighbor
            for j in range(len(self.generator.matrix[i])):
                if self.generator.matrix[i][j] == 1:
                    states[i].append(100)

            # reachable end-to-end throughput (all advertised are considered here)
            node = self.ids[i]
            for d in node.table:    states[i].append(0)
            for d in node.table_peer:   states[i].append(0)
            for d in node.table_provider:   states[i].append(0)

        """
            create RL module
        """
        # basic state
        for i in self.RLs:
            print("create mode for: " + str(i.id) + ", version -1")
            n_features = len(states[i.id])
            actor = Actor(sess, n_features, i.n_actions, i.id, -1)
            critic = Critic(sess, n_features, i.id, -1)
            i.set_rl_setting(actor, critic)
            sess.run(tf.global_variables_initializer())

        print("model created")
        '''
            loop time as time epoch
        '''

        sums = []
        sums_random = []

        TF = self.TF
        for time in range(self.MAX):
            print("begin time epoch: " + str(time))

            """
                choose an action
                    id : action label
            """
            # basic
            actions = {}
            for i in self.Ns:
                if i in self.RLs:
                    s = np.array(states[i.id])
                    pro = random.random()
                    if pro > 0.1:
                        actions[i.id] = i.actor.choose_action(s)
                    else:
                        actions[i.id] = random.randint(0, i.n_actions - 1)
                else:
                    actions[i.id] = 0

            # random
            actions_random = {}
            for i in self.Ns:
                # node i
                if i in self.RLs:
                    actions_random[i.id] = random.randint(0, i.n_actions - 1)
                else:
                    actions_random[i.id] = 0

            """
                actual flow
                    id : id : path
            """
            # basic
            actual_flow = collections.defaultdict(dict)
            for i in TF.keys():
                for j in TF[i].keys():
                    hop_path = []
                    cur = i
                    hop_path.append(self.ids[cur])
                    flag = -1
                    count = 0
                    while cur != j:
                        count += 1
                        if count > 10:
                            flag = 1
                            break
                        flag = 0
                        action = self.ids[cur].action_labels[actions[cur]]
                        if action.get(j) is not None:
                            cur = action[j]
                            hop_path.append(self.ids[cur])
                        else:
                            flag = 1
                            break
                    if flag == 0:
                        actual_flow[i][j] = hop_path

            num = 0
            if time == 0:
                for i in actual_flow.keys():
                    for j in actual_flow[i].keys():
                        num += 1
                print('actual flow: ' + str(num))

            # random
            actual_flow_random = collections.defaultdict(dict)
            for i in TF.keys():
                for j in TF[i].keys():
                    hop_path = []
                    cur = i
                    hop_path.append(self.ids[cur])
                    flag = -1
                    count = 0
                    while cur != j:
                        count += 1
                        if count > 10:
                            flag = 1
                            break
                        flag = 0
                        action = self.ids[cur].action_labels[actions_random[cur]]
                        if action.get(j) is not None:
                            cur = action[j]
                            hop_path.append(self.ids[cur])
                        else:
                            flag = 1
                            break
                    if flag == 0:
                        actual_flow_random[i][j] = hop_path

            """
                link load 
                    id : id : V
            """
            # basic
            link_load = np.zeros([self.N, self.N])
            for i in actual_flow.keys():
                for j in actual_flow[i].keys():
                    path = actual_flow[i][j]
                    for k in range(len(path) - 1):
                        e1 = path[k]
                        e2 = path[k + 1]
                        link_load[e1.id][e2.id] += TF[i][j]
                        link_load[e2.id][e1.id] += TF[i][j]

            # random
            link_load_random = np.zeros([self.N, self.N])
            for i in actual_flow_random.keys():
                for j in actual_flow_random[i].keys():
                    path = actual_flow_random[i][j]
                    for k in range(len(path) - 1):
                        e1 = path[k]
                        e2 = path[k + 1]
                        link_load_random[e1.id][e2.id] += TF[i][j]
                        link_load_random[e2.id][e1.id] += TF[i][j]

            """
                ee throughput
                    id : id : T
            """
            # basic
            ee_throughput = np.zeros([self.N, self.N])
            for i in actual_flow.keys():
                # input node i
                for j in actual_flow[i].keys():
                    path = actual_flow[i][j]
                    temp_min = 9999
                    for k in range(len(path) - 1):
                        node1 = path[k]
                        node2 = path[k + 1]
                        # TODO, enlarge link capacity of TT
                        if link_load[node1.id][node2.id] == 0:
                            ee = 100
                        else:
                            ee = 100 / link_load[node1.id][node2.id]
                        if ee < temp_min:
                            temp_min = ee
                    ee_throughput[i][j] = temp_min

            # random
            ee_throughput_random = np.zeros([self.N, self.N])
            for i in actual_flow_random.keys():
                # input node i
                for j in actual_flow_random[i].keys():
                    path = actual_flow_random[i][j]
                    temp_min = 9999
                    for k in range(len(path) - 1):
                        node1 = path[k]
                        node2 = path[k + 1]
                        # TODO, modify here, and the state
                        ee = 100 / link_load_random[node1.id][node2.id]
                        if ee < temp_min:
                            temp_min = ee
                    ee_throughput_random[i][j] = temp_min

            """
                next basic states for every node, neighbor part
            """
            states_ = collections.defaultdict(list)
            for i in range(self.N):
                for j in range(len(self.generator.matrix[i])):
                    if self.generator.matrix[i][j] == 1:
                        if link_load[i][j] == 0:
                            states_[i].append(100)
                        else:
                            states_[i].append(100 / link_load[i][j])

            """
                reward, 
                basic states, ee part
            """
            # basic
            rewards = {}
            for agent in self.RLs:
                temp_table = collections.defaultdict(list)
                for des in agent.table:
                    temp_table[des].append(0)
                for des in agent.table_peer:
                    temp_table[des].append(0)
                for des in agent.table_provider:
                    temp_table[des].append(0)

                sum_flow = 0
                sum_ee = 0
                for i in actual_flow.keys():
                    for j in actual_flow[i].keys():
                        path = actual_flow[i][j]
                        if agent in path and agent is not path[-1]:
                            sum_flow += 1
                            sum_ee += ee_throughput[i][j]
                            temp_table[j].append(ee_throughput[i][j])
                if sum_flow == 0:
                    rewards[agent.id] = 0
                else:
                    rewards[agent.id] = sum_ee / sum_flow

                for i in temp_table:
                    avg = sum(temp_table[i]) / len(temp_table[i])
                    states_[agent.id].append(avg)

            """
                system throughput
            """
            # basic
            sum_all = 0
            for i in range(self.N):
                for j in range(self.N):
                    sum_all += ee_throughput[i][j]

            # random
            sum_all_random = 0
            for i in range(self.N):
                for j in range(self.N):
                    sum_all_random += ee_throughput_random[i][j]

            """
                agent learns through s, a, r, s_
            """
            # basic
            for agent in self.RLs:
                s = np.array(states[agent.id])
                r = rewards[agent.id]
                s_ = np.array(states_[agent.id])
                a = actions[agent.id]
                exp = []
                exp.append(s)
                exp.append(r)
                exp.append(a)
                exp.append(s_)
                if len(self.experience_pool[agent.id]) < self.pool_size:
                    self.experience_pool[agent.id].append(exp)
                else:
                    self.experience_pool[agent.id] = self.experience_pool[agent.id][1:]
                    self.experience_pool[agent.id].append(exp)
                experience = random.choice(self.experience_pool[agent.id])
                s = experience[0]
                r = experience[1]
                a = experience[2]
                s_ = experience[3]
                td_error = agent.critic.learn(s, r, s_)
                agent.actor.learn(s, a, td_error)

            states = states_

            sums.append(sum_all)
            sums_random.append(sum_all_random)
            if sum_all_random > self.global_optimal:
                self.global_optimal = sum_all_random
            if sum_all > self.global_optimal:
                self.global_optimal = sum_all
            print('mini-rl: ' + str(sum_all))
            print('random: ' + str(sum_all_random))
            print('global optimal: ' + str(self.global_optimal))

            if time % 3000 == 0 and time != 0:
                str1 = 'basic-mini-sums' + str(time) + '.txt'
                file = open(str1, 'w')
                file.write(str(sums))
                file.close()
                str2 = 'basic-mini-sums_random' + str(time) + '.txt'
                file = open(str2, 'w')
                file.write(str(sums_random))
                file.close()
コード例 #5
0
class Game:
    def __init__(self):
        # Internet topology
        self.generator = MiniGenerator(10, 2)
        self.generator.build_topology()

        self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs
        self.N = 12
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        self.TF = collections.defaultdict(dict)
        self.tf_num = 10
        self.TF[6][9] = 1
        self.TF[6][10] = 1
        self.TF[7][4] = 1
        self.TF[7][5] = 1
        self.TF[8][10] = 1
        self.TF[8][11] = 1
        self.TF[0][9] = 1
        self.TF[1][11] = 1
        self.TF[2][5] = 1
        self.TF[3][4] = 1

        self.global_optimal = 600
        self.episode = 0.8

        # for each agent, add [s,a,r,s'] as element. size
        self.experience_pool = collections.defaultdict(dict)
        self.pool_size = 50
        self.sample_size = 10
        for i in range(self.tf_num):
            for j in range(self.N):
                self.experience_pool[i][j] = []

    def play_game(self):
        print("play")
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        print('sess')
        """
            init states for every node:
                src, dst, neighbor, e-e, others' actions
        """
        # basic states
        states = collections.defaultdict(list)
        for i in range(self.N):
            # add neighbor
            for j in range(len(self.generator.matrix[i])):
                if self.generator.matrix[i][j] == 1:
                    states[i].append(0)

            # reachable end-to-end throughput (all advertised are considered here)
            node = self.ids[i]
            for d in node.table:
                states[i].append(0)
            for d in node.table_peer:
                states[i].append(0)
            for d in node.table_provider:
                states[i].append(0)

        # combine others' states and last actions for each flow
        states_g = collections.defaultdict(list)
        for i in range(self.N):
            # add flow num
            states_g[i] = [0]
            # all agents' basic states
            for j in range(self.N):
                states_g[i] += states[j]
            # actions of all agents for all flows
            for k in range(self.tf_num):
                for q in range(self.N):
                    states_g[i].append(0)
        """
            create RL module
        """
        # TODO, give the action_labels
        for i in self.RLs:
            print("create mode for: " + str(i.id) + ", version -1")
            n_features_critic = len(states_g[i.id])  # len(states[i.id]) + 1
            n_features_actor = len(states[i.id]) + 1
            dqn = DeepQNetwork(sess, n_features_critic, i.n_actions, i.id,
                               i.action_labels)
            i.set_dqn(dqn)
            sess.run(tf.global_variables_initializer())

        print("model created")
        """
            loop time as time epoch
        """
        TF = self.TF
        # keep updating
        actions = collections.defaultdict(dict)
        f_num = 0
        for i in TF.keys():
            for j in TF[i].keys():
                for k in range(self.N):
                    # TODO, init without path error
                    valida = self.ids[k].filter[j]
                    if -1 in valida:
                        actions[f_num][k] = 0
                    else:
                        actions[f_num][k] = self.ids[k].action_labels.index(
                            random.choice(valida))
                f_num += 1

        sums = []
        for time in range(self.MAX):
            if time % 3000 == 0 and time != 0:
                self.episode /= 2
            # if time % 100 == 0 and time != 0:
            #     states = collections.defaultdict(list)
            #     for i in range(self.N):
            #         # add neighbor
            #         for j in range(len(self.generator.matrix[i])):
            #             if self.generator.matrix[i][j] == 1:
            #                 states[i].append(0)
            #
            #         # reachable end-to-end throughput (all advertised are considered here)
            #         node = self.ids[i]
            #         for d in node.table:    states[i].append(0)
            #         for d in node.table_peer:   states[i].append(0)
            #         for d in node.table_provider:   states[i].append(0)
            #
            #     # combine others' states and last actions for each flow
            #     states_g = collections.defaultdict(list)
            #     for i in range(self.N):
            #         # add flow num
            #         states_g[i] = [0]
            #         # all agents' basic states
            #         for j in range(self.N):
            #             states_g[i] += states[j]
            #         # actions of all agents for all flows
            #         for k in range(self.tf_num):
            #             for q in range(self.N):
            #                 states_g[i].append(0)
            #
            #     actions = collections.defaultdict(dict)
            #     f_num = 0
            #     for i in TF.keys():
            #         for j in TF[i].keys():
            #             for k in range(self.N):
            #                 # TODO, init without path error
            #                 valida = self.ids[k].filter[j]
            #                 if -1 in valida:
            #                     actions[f_num][k] = 0
            #                 else:
            #                     actions[f_num][k] = self.ids[k].action_labels.index(random.choice(valida))
            #             f_num += 1

            print("time: " + str(time))
            pro = random.random()
            train_state_pool = collections.defaultdict(dict)
            train_local_view = collections.defaultdict(dict)
            flow_num = 0
            sum_all = 0
            flow_actual_path = collections.defaultdict(list)
            rewards = {}
            for i in TF.keys():
                for j in TF[i].keys():
                    for agent in self.Ns:
                        # store state and state'
                        train_state_pool[flow_num][agent.id] = []
                        # specific one node to one flow
                        states_g[agent.id][0] = flow_num
                        ss = np.array(states_g[agent.id])

                        local_view = np.array([flow_num] + states[agent.id])
                        if pro > self.episode:
                            # TODO, use filter process, give the valid next-hops
                            valida = agent.filter[j]
                            if time >= 20000:
                                cnm = agent.dqn.choose_action(ss, valida)
                            else:
                                cnm = agent.dqn.choose_action(ss, valida)
                            actions[flow_num][agent.id] = cnm

                        # TODO, random need filter
                        else:
                            valida = agent.filter[j]
                            if -1 in valida:
                                actions[flow_num][agent.id] = 0
                            else:
                                actions[flow_num][
                                    agent.id] = agent.action_labels.index(
                                        random.choice(valida))

                        train_state_pool[flow_num][agent.id].append(ss)
                        train_local_view[flow_num][agent.id] = local_view

                    # update states to ss_
                    states_, rewards, sum_all, hh = self.update_state(
                        flow_num, actions)
                    flow_actual_path[flow_num] = hh

                    states_g_ = collections.defaultdict(list)
                    for k in range(self.N):
                        states_g_[k] = [flow_num]
                        for q in range(self.N):
                            states_g_[k] += states_[q]
                        for z in actions.keys():
                            for x in actions[z].keys():
                                states_g_[k].append(actions[z][x])
                    flow_num += 1
                    states_g = states_g_
                    states = states_

            flow_num = 0
            for i in TF.keys():
                for j in TF[i].keys():
                    for agent in self.Ns:
                        # TODO, only make useful agents to learn
                        if agent not in flow_actual_path[
                                flow_num][:len(flow_actual_path[flow_num]) -
                                          1]:
                            continue
                        ss = train_state_pool[flow_num][agent.id][0]
                        ss_ = states_g[agent.id]  # states[agent.id]  #
                        ss_[0] = flow_num
                        ss_ = np.array(ss_)
                        # ss_ = np.array([flow_num] + ss_)
                        view = train_local_view[flow_num][agent.id]

                        cur_exp = [
                            ss, ss_, actions[flow_num][agent.id],
                            rewards[agent.id], view
                        ]
                        agent.dqn.store_transition(cur_exp[0], cur_exp[2],
                                                   cur_exp[3], cur_exp[1])
                        agent.dqn.learn()
                    flow_num += 1

            sums.append(sum_all)
            print('game12-dqn-glb: ' + str(sum_all))
            if time % 3000 == 0 and time != 0:
                str1 = 'game12-dqn-glb' + str(time) + '.txt'
                file = open(str1, 'w')
                file.write(str(sums))
                file.close()

    def update_state(self, cur_flow, actions):
        TF = self.TF
        actual_flow = collections.defaultdict(dict)
        flow_num = 0
        path_return = []
        for i in TF.keys():
            for j in TF[i].keys():
                hop_path = []
                cur = i
                hop_path.append(self.ids[cur])
                flag = -1
                count = 0
                while cur != j:
                    count += 1
                    if count > 10:
                        flag = 1
                        break
                    flag = 0
                    # TODO, action type changed
                    action = self.ids[cur].action_labels[actions[flow_num]
                                                         [cur]]
                    cur = action
                    hop_path.append(self.ids[cur])

                if flag == 0:
                    actual_flow[i][j] = hop_path
                    if flow_num == cur_flow:
                        path_return = hop_path
                else:
                    print("error in hop path")
                    print("==================from " + str(i) + ' to ' + str(j))
                    for i in hop_path:
                        print(i.id)
                    print(actions)
                    sys.exit(0)
                flow_num += 1

        link_load = np.zeros([self.N, self.N])
        for i in actual_flow.keys():
            for j in actual_flow[i].keys():
                path = actual_flow[i][j]
                for k in range(len(path) - 1):
                    e1 = path[k]
                    e2 = path[k + 1]
                    link_load[e1.id][e2.id] += TF[i][j]
                    link_load[e2.id][e1.id] += TF[i][j]

        # store flow information on each link
        # node1:node2:[[src,dst],[src,dst]]
        # node2:node1:[[src,dst],...]
        link_flow_records = collections.defaultdict(dict)
        for i in range(self.N):
            for j in range(self.N):
                if self.generator.matrix[i][j] == 1:
                    link_flow_records[i][j] = []
                    link_flow_records[j][i] = []

        ee_throughput = np.zeros([self.N, self.N])
        for i in actual_flow.keys():
            # input node i
            for j in actual_flow[i].keys():
                flow = [i, j]
                path = actual_flow[i][j]
                temp_min = 9999
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    # record flow 'i j' on link 'node1 node2'
                    link_flow_records[node1.id][node2.id].append(flow)
                    link_flow_records[node2.id][node1.id].append(flow)

                    ee = 100 / link_load[node1.id][node2.id]
                    if ee < temp_min:
                        temp_min = ee
                ee_throughput[i][j] = temp_min
        # ee is basic throughput for each flow, need to be increased

        link_residue = collections.defaultdict(dict)
        for i in range(self.N):
            for j in range(self.N):
                if self.generator.matrix[i][j] == 1:
                    # for link 'i j'
                    remain = 100
                    for flow in link_flow_records[i][j]:
                        remain -= ee_throughput[flow[0]][flow[1]]
                    link_residue[i][j] = remain
                    link_residue[j][i] = remain

        # increase each flow throughput, update link_residue
        for i in actual_flow.keys():
            for j in actual_flow[i].keys():
                path = actual_flow[i][j]
                temp_min = 9999
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    if link_residue[node1.id][node2.id] < temp_min:
                        temp_min = link_residue[node1.id][node2.id]
                # increase
                if temp_min == 0:
                    continue
                ee_throughput[i][j] += temp_min
                # update
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    link_residue[node1.id][node2.id] -= temp_min
                    link_residue[node2.id][node1.id] -= temp_min

        states_ = collections.defaultdict(list)
        for i in range(self.N):
            for j in range(len(self.generator.matrix[i])):
                if self.generator.matrix[i][j] == 1:
                    states_[i].append(link_load[i][j])
                    # if link_load[i][j] == 0:
                    #     states_[i].append(100)
                    # else:
                    #     states_[i].append(100 / link_load[i][j])

        rewards = {}
        for agent in self.RLs:
            temp_table = collections.defaultdict(list)
            for des in agent.table:
                temp_table[des].append(0)
            for des in agent.table_peer:
                temp_table[des].append(0)
            for des in agent.table_provider:
                temp_table[des].append(0)

            sum_flow = 0
            sum_ee = 0
            for i in actual_flow.keys():
                for j in actual_flow[i].keys():
                    path = actual_flow[i][j]
                    if agent in path and agent is not path[-1]:
                        sum_flow += 1
                        sum_ee += ee_throughput[i][j]
                        temp_table[j].append(ee_throughput[i][j])
            if sum_flow == 0:
                rewards[agent.id] = 0
            else:
                rewards[agent.id] = sum_ee / sum_flow

            for i in temp_table:
                avg = sum(temp_table[i]) / len(temp_table[i])
                states_[agent.id].append(avg)

        sum_all = 0
        for i in range(self.N):
            for j in range(self.N):
                sum_all += ee_throughput[i][j]

        return states_, rewards, sum_all, path_return
コード例 #6
0
class Game:
    def __init__(self):
        # Internet topology
        self.generator = MiniGenerator(10, 2)
        self.generator.build_topology()
        self.generator.build_directed_matrix()

        self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs
        self.N = 12
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        self.TF = collections.defaultdict(dict)
        self.tf_num = 10
        self.TF[6][9] = 1
        self.TF[6][10] = 1
        self.TF[7][4] = 1
        self.TF[7][5] = 1
        self.TF[8][10] = 1
        self.TF[8][11] = 1
        self.TF[0][9] = 1
        self.TF[1][11] = 1
        self.TF[2][5] = 1
        self.TF[3][4] = 1

        # self.TF[9][6] = 1
        # self.TF[10][6] = 1
        # self.TF[4][7] = 1
        # self.TF[5][7] = 1
        # self.TF[10][8] = 1
        # self.TF[11][8] = 1
        # self.TF[9][0] = 1
        # self.TF[11][1] = 1
        # self.TF[5][2] = 1
        # self.TF[4][3] = 1

        # for each agent, add [s,a,r,s'] as element. size
        self.experience_pool = collections.defaultdict(dict)
        self.pool_size = 10
        for i in range(self.tf_num):
            for j in range(self.N):
                self.experience_pool[i][j] = []

    def play_game(self):
        print("play")
        sess = tf.Session()
        print('sess')
        """
            init states for every node:
                src, dst, neighbor, e-e, others' actions
        """
        states = collections.defaultdict(list)
        for i in range(self.N):
            # add flow number
            states[i].append(0)

            # add neighbor
            for j in range(len(self.generator.matrix[i])):
                if self.generator.matrix[i][j] == 1:
                    states[i].append(100)

            # reachable end-to-end throughput (all advertised are considered here)
            node = self.ids[i]
            for d in node.table:
                states[i].append(0)
            for d in node.table_peer:
                states[i].append(0)
            for d in node.table_provider:
                states[i].append(0)
            # TODO, consider how to combine actions
        """
            create RL module
        """
        for i in self.RLs:
            print("create mode for: " + str(i.id) + ", version -1")
            n_features = len(states[i.id])
            actor = Actor(sess, n_features, i.n_actions, i.id, -1)
            critic = Critic(sess, n_features, i.id, -1)
            i.set_rl_setting(actor, critic)
            sess.run(tf.global_variables_initializer())

        print("model created")
        """
            loop time as time epoch
        """
        TF = self.TF
        # keep updating
        actions = collections.defaultdict(dict)
        for i in range(self.tf_num):
            for j in range(self.N):
                actions[i][j] = 0

        sums = []
        for time in range(self.MAX):
            print("begin time epoch: " + str(time))
            train_state_pool = collections.defaultdict(dict)
            flow_num = 0
            sum_all = 0
            rewards = {}
            for i in TF.keys():
                for j in TF[i].keys():
                    for agent in self.Ns:
                        # store state and state'
                        train_state_pool[flow_num][agent.id] = []
                        # specific one node to one flow
                        states[agent.id][0] = flow_num
                        ss = np.array(states[agent.id])
                        pro = random.random()
                        if pro > 0.1:
                            actions[flow_num][
                                agent.id] = agent.actor.choose_action(ss)
                        else:
                            actions[flow_num][agent.id] = random.randint(
                                0, agent.n_actions - 1)

                        train_state_pool[flow_num][agent.id].append(ss)

                    # update states to ss_
                    states_, rewards, sum_all = self.update_state(
                        flow_num, actions)

                    flow_num += 1
                    states = states_

            # TODO, add experience replay
            flow_num = 0
            for i in TF.keys():
                for j in TF[i].keys():
                    for agent in self.Ns:
                        ss = train_state_pool[flow_num][agent.id][0]
                        ss_ = states[agent.id]
                        ss_[0] = flow_num
                        ss_ = np.array(ss_)

                        exp = []
                        exp.append(ss)
                        exp.append(rewards[agent.id])
                        exp.append(actions[flow_num][agent.id])
                        exp.append(ss_)

                        if len(self.experience_pool[flow_num][
                                agent.id]) < self.pool_size:
                            self.experience_pool[flow_num][agent.id].append(
                                exp)
                        else:
                            self.experience_pool[flow_num][
                                agent.id] = self.experience_pool[flow_num][
                                    agent.id][1:]
                            self.experience_pool[flow_num][agent.id].append(
                                exp)

                        experience = random.choice(
                            self.experience_pool[flow_num][agent.id])

                        s = experience[0]
                        r = experience[1]
                        a = experience[2]
                        s_ = experience[3]

                        td_error = agent.critic.learn(s, r, s_)
                        agent.actor.learn(s, a, td_error)
                    flow_num += 1

            sums.append(sum_all)
            print('cut-rl-tf10-exp: ' + str(sum_all))
            if time % 500 == 0 and time != 0:
                str1 = 'cut-final-tf10-exp-sums' + str(time) + '.txt'
                file = open(str1, 'w')
                file.write(str(sums))
                file.close()

    def update_state(self, cur_flow, actions):
        TF = self.TF
        actual_flow = collections.defaultdict(dict)
        flow_num = 0
        for i in TF.keys():
            for j in TF[i].keys():
                hop_path = []
                cur = i
                hop_path.append(self.ids[cur])
                flag = -1
                count = 0
                while cur != j:
                    count += 1
                    if count > 10:
                        print("error in hop path")
                        flag = 1
                        break
                    flag = 0
                    action = self.ids[cur].action_labels[actions[flow_num]
                                                         [cur]]
                    if action.get(j) is not None:
                        cur = action[j]
                        hop_path.append(self.ids[cur])
                    else:
                        flag = 1
                        print("error in hop path")
                        break
                if flag == 0:
                    actual_flow[i][j] = hop_path
                flow_num += 1

        link_load = np.zeros([self.N, self.N])
        for i in actual_flow.keys():
            for j in actual_flow[i].keys():
                path = actual_flow[i][j]
                for k in range(len(path) - 1):
                    e1 = path[k]
                    e2 = path[k + 1]
                    link_load[e1.id][e2.id] += TF[i][j]
                    link_load[e2.id][e1.id] += TF[i][j]

        # store flow information on each link
        # node1:node2:[[src,dst],[src,dst]]
        # node2:node1:[[src,dst],...]
        link_flow_records = collections.defaultdict(dict)
        for i in range(self.N):
            for j in range(self.N):
                if self.generator.matrix[i][j] == 1:
                    link_flow_records[i][j] = []
                    link_flow_records[j][i] = []

        ee_throughput = np.zeros([self.N, self.N])
        for i in actual_flow.keys():
            # input node i
            for j in actual_flow[i].keys():
                flow = [i, j]
                path = actual_flow[i][j]
                temp_min = 9999
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    # record flow 'i j' on link 'node1 node2'
                    link_flow_records[node1.id][node2.id].append(flow)
                    link_flow_records[node2.id][node1.id].append(flow)

                    ee = 100 / link_load[node1.id][node2.id]
                    if ee < temp_min:
                        temp_min = ee
                ee_throughput[i][j] = temp_min
        # ee is basic throughput for each flow, need to be increased

        link_residue = collections.defaultdict(dict)
        for i in range(self.N):
            for j in range(self.N):
                if self.generator.matrix[i][j] == 1:
                    # for link 'i j'
                    remain = 100
                    for flow in link_flow_records[i][j]:
                        remain -= ee_throughput[flow[0]][flow[1]]
                    link_residue[i][j] = remain
                    link_residue[j][i] = remain

        # TODO, increase each flow throughput, update link_residue
        for i in actual_flow.keys():
            for j in actual_flow[i].keys():
                path = actual_flow[i][j]
                temp_min = 9999
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    if link_residue[node1.id][node2.id] < temp_min:
                        temp_min = link_residue[node1.id][node2.id]
                # increase
                if temp_min == 0:
                    continue
                ee_throughput[i][j] += temp_min
                # update
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    link_residue[node1.id][node2.id] -= temp_min
                    link_residue[node2.id][node1.id] -= temp_min

        states_ = collections.defaultdict(list)
        for i in range(self.N):
            states_[i].append(cur_flow)
            for j in range(len(self.generator.matrix[i])):
                if self.generator.matrix[i][j] == 1:
                    if link_load[i][j] == 0:
                        states_[i].append(100)
                    else:
                        states_[i].append(100 / link_load[i][j])

        rewards = {}
        for agent in self.RLs:
            temp_table = collections.defaultdict(list)
            for des in agent.table:
                temp_table[des].append(0)
            for des in agent.table_peer:
                temp_table[des].append(0)
            for des in agent.table_provider:
                temp_table[des].append(0)

            sum_flow = 0
            sum_ee = 0
            for i in actual_flow.keys():
                for j in actual_flow[i].keys():
                    path = actual_flow[i][j]
                    if agent in path and agent is not path[-1]:
                        sum_flow += 1
                        sum_ee += ee_throughput[i][j]
                        temp_table[j].append(ee_throughput[i][j])
            if sum_flow == 0:
                rewards[agent.id] = 0
            else:
                rewards[agent.id] = sum_ee / sum_flow

            for i in temp_table:
                avg = sum(temp_table[i]) / len(temp_table[i])
                states_[agent.id].append(avg)

        sum_all = 0
        for i in range(self.N):
            for j in range(self.N):
                sum_all += ee_throughput[i][j]

        return states_, rewards, sum_all
コード例 #7
0
class Game:
    def __init__(self):
        # Internet topology
        self.generator = MiniGenerator(10, 2)
        self.generator.build_topology()
        self.generator.build_directed_matrix()

        self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs
        self.N = 12
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        # for each agent, add [s,a,r,s'] as element. size
        self.experience_pool = collections.defaultdict(list)
        self.pool_size = 10

        # TODO, define TF, Matrix, Linprog
        self.TF = collections.defaultdict(dict)
        self.TF[6][9] = 1
        self.TF[6][10] = 1
        self.TF[7][4] = 1
        self.TF[7][5] = 1
        self.TF[8][10] = 1
        self.TF[8][11] = 1
        self.TF[0][9] = 1
        self.TF[1][11] = 1
        self.TF[2][5] = 1
        self.TF[3][4] = 1

    def play_game(self):
        """
            loop time as time epoch
        """
        TF = self.TF
        # keep updating
        actions = collections.defaultdict(dict)
        for i in range(10):
            for j in range(self.N):
                actions[i][j] = 0

        sums = []
        for time in range(self.MAX):
            print("begin time epoch: " + str(time))
            train_state_pool = collections.defaultdict(dict)
            flow_num = 0
            sum_all = 0
            for i in TF.keys():
                for j in TF[i].keys():
                    for agent in self.Ns:
                        actions[flow_num][agent.id] = random.randint(
                            0, agent.n_actions - 1)

                    # update states to ss_
                    sum_all = self.update_state(flow_num, actions)

                    flow_num += 1

            sums.append(sum_all)
            print('cut-random: ' + str(sum_all))
            if time % 10000 == 0 and time != 0:
                str1 = 'cut-mini-random' + str(time) + '.txt'
                file = open(str1, 'w')
                file.write(str(sums))
                file.close()

    def update_state(self, cur_flow, actions):
        TF = self.TF
        actual_flow = collections.defaultdict(dict)
        flow_num = 0
        for i in TF.keys():
            for j in TF[i].keys():
                hop_path = []
                cur = i
                hop_path.append(self.ids[cur])
                flag = -1
                count = 0
                while cur != j:
                    count += 1
                    if count > 10:
                        print("error in hop path")
                        flag = 1
                        break
                    flag = 0
                    action = self.ids[cur].action_labels[actions[flow_num]
                                                         [cur]]
                    if action.get(j) is not None:
                        cur = action[j]
                        hop_path.append(self.ids[cur])
                    else:
                        flag = 1
                        print("error in hop path")
                        break
                if flag == 0:
                    actual_flow[i][j] = hop_path
                flow_num += 1

        link_load = np.zeros([self.N, self.N])
        for i in actual_flow.keys():
            for j in actual_flow[i].keys():
                path = actual_flow[i][j]
                for k in range(len(path) - 1):
                    e1 = path[k]
                    e2 = path[k + 1]
                    link_load[e1.id][e2.id] += TF[i][j]
                    link_load[e2.id][e1.id] += TF[i][j]

        # store flow information on each link
        # node1:node2:[[src,dst],[src,dst]]
        # node2:node1:[[src,dst],...]
        link_flow_records = collections.defaultdict(dict)
        for i in range(self.N):
            for j in range(self.N):
                if self.generator.matrix[i][j] == 1:
                    link_flow_records[i][j] = []
                    link_flow_records[j][i] = []

        ee_throughput = np.zeros([self.N, self.N])
        for i in actual_flow.keys():
            # input node i
            for j in actual_flow[i].keys():
                flow = [i, j]
                path = actual_flow[i][j]
                temp_min = 9999
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    # record flow 'i j' on link 'node1 node2'
                    link_flow_records[node1.id][node2.id].append(flow)
                    link_flow_records[node2.id][node1.id].append(flow)

                    ee = 100 / link_load[node1.id][node2.id]
                    if ee < temp_min:
                        temp_min = ee
                ee_throughput[i][j] = temp_min
        # ee is basic throughput for each flow, need to be increased

        link_residue = collections.defaultdict(dict)
        for i in range(self.N):
            for j in range(self.N):
                if self.generator.matrix[i][j] == 1:
                    # for link 'i j'
                    remain = 100
                    for flow in link_flow_records[i][j]:
                        remain -= ee_throughput[flow[0]][flow[1]]
                    link_residue[i][j] = remain
                    link_residue[j][i] = remain

        # TODO, increase each flow throughput, update link_residue
        for i in actual_flow.keys():
            for j in actual_flow[i].keys():
                path = actual_flow[i][j]
                temp_min = 9999
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    if link_residue[node1.id][node2.id] < temp_min:
                        temp_min = link_residue[node1.id][node2.id]
                # increase
                if temp_min == 0:
                    continue
                ee_throughput[i][j] += temp_min
                # update
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    link_residue[node1.id][node2.id] -= temp_min
                    link_residue[node2.id][node1.id] -= temp_min

        sum_all = 0
        for i in range(self.N):
            for j in range(self.N):
                sum_all += ee_throughput[i][j]

        return sum_all
コード例 #8
0
    def __init__(self):
        # Internet topology
        self.generator = MiniGenerator(10, 2)
        self.generator.build_topology()
        self.generator.build_directed_matrix()

        self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs
        self.N = 12
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        # define time epoch
        self.tf_num = 20
        self.TF_time = collections.defaultdict(dict)
        self.TF_time[6][9] = 3
        self.TF_time[6][10] = 2
        self.TF_time[7][4] = 1
        self.TF_time[7][5] = 3
        self.TF_time[8][10] = 2
        self.TF_time[8][11] = 1
        self.TF_time[0][9] = 1
        self.TF_time[1][11] = 3
        self.TF_time[2][5] = 2
        self.TF_time[3][4] = 3

        self.TF_time[6][11] = 2
        self.TF_time[7][10] = 2
        self.TF_time[8][4] = 1
        self.TF_time[2][4] = 3
        self.TF_time[3][5] = 2
        self.TF_time[0][10] = 1
        self.TF_time[1][9] = 1
        self.TF_time[6][11] = 3
        self.TF_time[8][5] = 2
        self.TF_time[3][10] = 1

        self.TF_id = []
        self.TF_id.append([6, 9])
        self.TF_id.append([6, 10])
        self.TF_id.append([7, 4])
        self.TF_id.append([7, 5])
        self.TF_id.append([8, 10])
        self.TF_id.append([8, 11])
        self.TF_id.append([0, 9])
        self.TF_id.append([1, 11])
        self.TF_id.append([2, 5])
        self.TF_id.append([3, 4])

        self.TF_id.append([6, 11])
        self.TF_id.append([7, 10])
        self.TF_id.append([8, 4])
        self.TF_id.append([2, 4])
        self.TF_id.append([3, 5])
        self.TF_id.append([0, 10])
        self.TF_id.append([1, 9])
        self.TF_id.append([6, 11])
        self.TF_id.append([8, 5])
        self.TF_id.append([3, 10])

        self.global_optimal = 600
        self.episode = 0.2

        # for each agent, add [s,a,r,s'] as element. size
        self.experience_pool = collections.defaultdict(dict)
        self.pool_size = 200
        self.sample_size = 16
コード例 #9
0
class Game:
    def __init__(self):
        # Internet topology
        self.generator = MiniGenerator(10, 2)
        self.generator.build_topology()
        self.generator.build_directed_matrix()

        self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs
        self.N = 12
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        # define time epoch
        self.tf_num = 20
        self.TF_time = collections.defaultdict(dict)
        self.TF_time[6][9] = 3
        self.TF_time[6][10] = 2
        self.TF_time[7][4] = 1
        self.TF_time[7][5] = 3
        self.TF_time[8][10] = 2
        self.TF_time[8][11] = 1
        self.TF_time[0][9] = 1
        self.TF_time[1][11] = 3
        self.TF_time[2][5] = 2
        self.TF_time[3][4] = 3

        self.TF_time[6][11] = 2
        self.TF_time[7][10] = 2
        self.TF_time[8][4] = 1
        self.TF_time[2][4] = 3
        self.TF_time[3][5] = 2
        self.TF_time[0][10] = 1
        self.TF_time[1][9] = 1
        self.TF_time[6][11] = 3
        self.TF_time[8][5] = 2
        self.TF_time[3][10] = 1

        self.TF_id = []
        self.TF_id.append([6, 9])
        self.TF_id.append([6, 10])
        self.TF_id.append([7, 4])
        self.TF_id.append([7, 5])
        self.TF_id.append([8, 10])
        self.TF_id.append([8, 11])
        self.TF_id.append([0, 9])
        self.TF_id.append([1, 11])
        self.TF_id.append([2, 5])
        self.TF_id.append([3, 4])

        self.TF_id.append([6, 11])
        self.TF_id.append([7, 10])
        self.TF_id.append([8, 4])
        self.TF_id.append([2, 4])
        self.TF_id.append([3, 5])
        self.TF_id.append([0, 10])
        self.TF_id.append([1, 9])
        self.TF_id.append([6, 11])
        self.TF_id.append([8, 5])
        self.TF_id.append([3, 10])

        self.global_optimal = 600
        self.episode = 0.2

        # for each agent, add [s,a,r,s'] as element. size
        self.experience_pool = collections.defaultdict(dict)
        self.pool_size = 200
        self.sample_size = 16

        # self.randgame = RandomGame()

    def play_game(self):
        print("play")
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        print('sess')
        """
            init states for every node:
                src, dst, neighbor, e-e, others' actions
        """
        # basic states
        states = collections.defaultdict(list)
        for i in range(self.N):
            # add neighbor
            for j in range(len(self.generator.matrix[i])):
                if self.generator.matrix[i][j] == 1:
                    states[i].append(0)

            # reachable end-to-end throughput (all advertised are considered here)
            node = self.ids[i]
            for d in node.table:
                states[i].append(0)
            for d in node.table_peer:
                states[i].append(0)
            for d in node.table_provider:
                states[i].append(0)

        # TODO, combine others' states and last actions for each flow
        states_g = collections.defaultdict(list)
        for i in range(self.N):
            # add flow num
            states_g[i] = [0]
            # all agents' basic states
            for j in range(self.N):
                states_g[i] += states[j]
            # actions of all agents for all flows
            for k in range(self.tf_num):
                for q in range(self.N):
                    states_g[i].append(0)
        """
            create RL module
        """
        for i in self.RLs:
            print("create mode for: " + str(i.id) + ", version -1")
            n_features_critic = len(states_g[i.id])  # len(states[i.id]) + 1
            n_features_actor = len(states[i.id]) + 1
            actor = Actor(sess, n_features_actor, i.n_actions, i.id, -1,
                          i.action_labels)
            critic = Critic(sess, n_features_critic, i.id, -1)
            i.set_rl_setting(actor, critic)
            sess.run(tf.global_variables_initializer())

        print("model created")
        """
            loop time as time epoch
        """
        # keep updating
        actions = collections.defaultdict(dict)
        for i in self.TF_time.keys():
            for j in self.TF_time[i].keys():
                f_num = self.TF_id.index([i, j])
                for k in range(self.N):
                    # TODO, init without path error
                    valida = self.ids[k].filter[j]
                    if -1 in valida:
                        actions[f_num][k] = 0
                    else:
                        actions[f_num][k] = self.ids[k].action_labels.index(
                            random.choice(valida))

        sums = []
        sums_rand = []

        # init tf
        TF = collections.defaultdict(dict)
        TF[6][9] = 0
        TF[6][10] = 0
        TF[7][4] = 0
        TF[7][5] = 0
        TF[8][10] = 0
        TF[8][11] = 0
        TF[0][9] = 0
        TF[1][11] = 0
        TF[2][5] = 0
        TF[3][4] = 0

        queue = collections.deque([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
        for time in range(self.MAX):
            # check if any flow out time
            expire = []
            for i in TF.keys():
                for j in TF[i].keys():
                    if TF[i][j] > self.TF_time[i][j]:
                        # inject new flow from queue
                        # store old flow to queue
                        expire.append([i, j])

            for f in expire:
                i = f[0]
                j = f[1]
                TF[i].pop(j)
                queue.append(self.TF_id.index([i, j]))
                for k in actions[self.TF_id.index([i, j])].keys():
                    actions[self.TF_id.index([i, j])][k] = 0

                new_flow = queue.popleft()
                TF[self.TF_id[new_flow][0]][self.TF_id[new_flow][1]] = 0

            for round in range(500):
                # rand_value = self.randgame.play_game(TF, self.tf_num)
                whole_time = time * 10 + round
                print("time: " + str(whole_time))
                if whole_time % 3000 == 0 and whole_time != 0:
                    self.episode /= 2
                pro = random.random()
                train_state_pool = collections.defaultdict(dict)
                train_local_view = collections.defaultdict(dict)
                flow_actual_path = collections.defaultdict(list)
                sum_all = 0
                rewards = {}
                for i in TF.keys():
                    for j in TF[i].keys():
                        for agent in self.Ns:
                            flow_num = self.TF_id.index([i, j])
                            # store state and state'
                            train_state_pool[flow_num][agent.id] = []
                            # specific one node to one flow
                            states_g[agent.id][0] = flow_num
                            ss = np.array(states_g[agent.id])

                            local_view = np.array([flow_num] +
                                                  states[agent.id])
                            if pro > self.episode:
                                valida = agent.filter[j]
                                cnm = agent.actor.choose_action(local_view,
                                                                valida,
                                                                method='prob')
                                actions[flow_num][agent.id] = cnm
                            else:
                                valida = agent.filter[j]
                                if -1 in valida:
                                    actions[flow_num][agent.id] = 0
                                else:
                                    actions[flow_num][
                                        agent.id] = agent.action_labels.index(
                                            random.choice(valida))

                            train_state_pool[flow_num][agent.id].append(ss)
                            train_local_view[flow_num][agent.id] = local_view

                        flow_num = self.TF_id.index([i, j])
                        # update states to ss_
                        states_, rewards, sum_all, hh = self.update_state(
                            TF, flow_num, actions)
                        flow_actual_path[flow_num] = hh
                        states_g_ = collections.defaultdict(list)
                        for k in range(self.N):
                            states_g_[k] = [flow_num]
                            for q in range(self.N):
                                states_g_[k] += states_[q]
                            for z in actions.keys():
                                for x in actions[z].keys():
                                    states_g_[k].append(actions[z][x])
                        states_g = states_g_
                        states = states_

                for i in TF.keys():
                    for j in TF[i].keys():
                        flow_num = self.TF_id.index([i, j])
                        for agent in self.Ns:
                            if agent not in flow_actual_path[flow_num][:len(
                                    flow_actual_path[flow_num]) - 1]:
                                continue
                            ss = train_state_pool[flow_num][agent.id][0]
                            ss_ = states_g[agent.id]  # states[agent.id]  #
                            ss_[0] = flow_num
                            ss_ = np.array(ss_)
                            # ss_ = np.array([flow_num] + ss_)
                            view = train_local_view[flow_num][agent.id]

                            cur_exp = [
                                ss, ss_, actions[flow_num][agent.id],
                                rewards[agent.id], view
                            ]
                            td_error = agent.critic.learn(
                                cur_exp[0], cur_exp[3], cur_exp[1])
                            agent.actor.learn(cur_exp[4], cur_exp[2], td_error)

                sums.append(sum_all)
                # sums_rand.append(rand_value)
                print('game12-dtf-glb: ' + str(sum_all))
                # print('random: ' + str(rand_value))
                if whole_time % 3000 == 0 and whole_time != 0:
                    str1 = 'game12-dtf-glb' + str(time) + '.txt'
                    file = open(str1, 'w')
                    file.write(str(sums))
                    file.close()
                    str2 = 'game12-dtf-rand' + str(time) + '.txt'
                    file = open(str2, 'w')
                    file.write(str(sums_rand))
                    file.close()

            for i in TF.keys():
                for j in TF[i].keys():
                    TF[i][j] += 1

    def update_state(self, tf, cur_flow, actions):
        TF = tf
        actual_flow = collections.defaultdict(dict)
        for i in TF.keys():
            for j in TF[i].keys():
                flow_num = self.TF_id.index([i, j])
                hop_path = []
                cur = i
                hop_path.append(self.ids[cur])
                flag = -1
                count = 0
                while cur != j:
                    count += 1
                    if count > 10:
                        print("error in hop path")
                        flag = 1
                        break
                    flag = 0
                    action = self.ids[cur].action_labels[actions[flow_num]
                                                         [cur]]
                    cur = action
                    hop_path.append(self.ids[cur])

                if flag == 0:
                    actual_flow[i][j] = hop_path
                    if flow_num == cur_flow:
                        path_return = hop_path
                else:
                    print("error in hop path")
                    print("==================from " + str(i) + ' to ' + str(j))
                    for i in hop_path:
                        print(i.id)
                    print(actions)
                    print(i.actor.oldp)
                    print(i.actor.newp)
                    sys.exit(0)

        link_load = np.zeros([self.N, self.N])
        for i in actual_flow.keys():
            for j in actual_flow[i].keys():
                path = actual_flow[i][j]
                for k in range(len(path) - 1):
                    e1 = path[k]
                    e2 = path[k + 1]
                    link_load[e1.id][e2.id] += 1
                    link_load[e2.id][e1.id] += 1

        # store flow information on each link
        # node1:node2:[[src,dst],[src,dst]]
        # node2:node1:[[src,dst],...]
        link_flow_records = collections.defaultdict(dict)
        for i in range(self.N):
            for j in range(self.N):
                if self.generator.matrix[i][j] == 1:
                    link_flow_records[i][j] = []
                    link_flow_records[j][i] = []

        ee_throughput = np.zeros([self.N, self.N])
        for i in actual_flow.keys():
            # input node i
            for j in actual_flow[i].keys():
                flow = [i, j]
                path = actual_flow[i][j]
                temp_min = 9999
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    # record flow 'i j' on link 'node1 node2'
                    link_flow_records[node1.id][node2.id].append(flow)
                    link_flow_records[node2.id][node1.id].append(flow)

                    ee = 100 / link_load[node1.id][node2.id]
                    if ee < temp_min:
                        temp_min = ee
                ee_throughput[i][j] = temp_min
        # ee is basic throughput for each flow, need to be increased

        link_residue = collections.defaultdict(dict)
        for i in range(self.N):
            for j in range(self.N):
                if self.generator.matrix[i][j] == 1:
                    # for link 'i j'
                    remain = 100
                    for flow in link_flow_records[i][j]:
                        remain -= ee_throughput[flow[0]][flow[1]]
                    link_residue[i][j] = remain
                    link_residue[j][i] = remain

        # increase each flow throughput, update link_residue
        for i in actual_flow.keys():
            for j in actual_flow[i].keys():
                path = actual_flow[i][j]
                temp_min = 9999
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    if link_residue[node1.id][node2.id] < temp_min:
                        temp_min = link_residue[node1.id][node2.id]
                # increase
                if temp_min == 0:
                    continue
                ee_throughput[i][j] += temp_min
                # update
                for k in range(len(path) - 1):
                    node1 = path[k]
                    node2 = path[k + 1]
                    link_residue[node1.id][node2.id] -= temp_min
                    link_residue[node2.id][node1.id] -= temp_min

        states_ = collections.defaultdict(list)
        for i in range(self.N):
            for j in range(len(self.generator.matrix[i])):
                if self.generator.matrix[i][j] == 1:
                    states_[i].append(link_load[i][j])
                    # if link_load[i][j] == 0:
                    #     states_[i].append(100)
                    # else:
                    #     states_[i].append(100 / link_load[i][j])

        rewards = {}
        for agent in self.RLs:
            temp_table = collections.defaultdict(list)
            for des in agent.table:
                temp_table[des].append(0)
            for des in agent.table_peer:
                temp_table[des].append(0)
            for des in agent.table_provider:
                temp_table[des].append(0)

            sum_flow = 0
            sum_ee = 0
            for i in actual_flow.keys():
                for j in actual_flow[i].keys():
                    path = actual_flow[i][j]
                    if agent in path and agent is not path[-1]:
                        sum_flow += 1
                        sum_ee += ee_throughput[i][j]
                        temp_table[j].append(ee_throughput[i][j])
            if sum_flow == 0:
                rewards[agent.id] = 0
            else:
                rewards[agent.id] = sum_ee / sum_flow

            for i in temp_table:
                avg = sum(temp_table[i]) / len(temp_table[i])
                states_[agent.id].append(avg)

        sum_all = 0
        for i in range(self.N):
            for j in range(self.N):
                sum_all += ee_throughput[i][j]

        return states_, rewards, sum_all, path_return