def __init__(self):
        """
            All these vars are unchanged during game
        """
        self.generator = HugeGenerator(91, 1)
        self.generator.build_topology()

        # TODO, modify this by dividing
        self.RLs = self.generator.Ms[
            2:
            8]  # self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs
        self.N = 92
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        self.TF = collections.defaultdict(dict)
        for region in range(1, 14):
            header_cp = 14 + 6 * (region - 1)
            tail_cp = 19 + 6 * (region - 1)
            self.TF[0][header_cp] = 1
            self.TF[0][tail_cp] = 1

            nei_region = (region + 1) % 13
            if nei_region == 0:
                nei_region = 13
            header_cp_nei = 14 + 6 * (nei_region - 1)
            tail_cp_nei = 19 + 6 * (nei_region - 1)
            self.TF[header_cp + 2][header_cp_nei + 2] = 1
            self.TF[header_cp + 3][header_cp_nei + 3] = 1
            self.TF[header_cp][header_cp_nei] = 1
            self.TF[tail_cp][tail_cp_nei] = 1

        self.tf_num = 0
        for i in self.TF.keys():
            for j in self.TF[i].keys():
                self.tf_num += 1
        print("Inject " + str(self.tf_num) + " flows")

        self.global_optimal = 600
        self.episode = 0.1
        self.memory = None
        self.agents_gpu = collections.defaultdict(list)
        agents_gpu[0] = []
        agents_gpu[1] = []
        agents_gpu[2] = []
        agents_gpu[3] = []

        # for each agent, add [s,a,r,s'] as element. size
        self.pool_size = 16
        self.sample_size = 16
class Game:
    def __init__(self):
        """
            All these vars are unchanged during game
        """
        self.generator = HugeGenerator(91, 1)
        self.generator.build_topology()

        # TODO, modify this by dividing
        self.RLs = self.generator.Ms[8:]
        self.N = 92
        self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs

        self.MAX = 100000
        self.ids = self.generator.ids

        self.TF = collections.defaultdict(dict)
        for region in range(1, 14):
            header_cp = 14 + 6 * (region - 1)
            tail_cp = 19 + 6 * (region - 1)
            self.TF[0][header_cp] = 1
            self.TF[0][tail_cp] = 1

            nei_region = (region + 1) % 13
            if nei_region == 0:
                nei_region = 13
            header_cp_nei = 14 + 6 * (nei_region - 1)
            tail_cp_nei = 19 + 6 * (nei_region - 1)
            self.TF[header_cp + 2][header_cp_nei + 2] = 1
            self.TF[header_cp + 3][header_cp_nei + 3] = 1
            self.TF[header_cp][header_cp_nei] = 1
            self.TF[tail_cp][tail_cp_nei] = 1

        self.tf_num = 0
        for i in self.TF.keys():
            for j in self.TF[i].keys():
                self.tf_num += 1
        print("Inject " + str(self.tf_num) + " flows")

        self.global_optimal = 600
        self.episode = 0
        self.memory = None
        self.agents_gpu = collections.defaultdict(list)
        agents_gpu[0] = []
        agents_gpu[1] = []
        agents_gpu[2] = []
        agents_gpu[3] = []

        # for each agent, add [s,a,r,s'] as element. size
        self.pool_size = 16
        self.sample_size = 16
        # self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    def set_global_memory(self, m):
        self.memory = m

    def set_rl_agents(self):
        for i in self.RLs:
            print("create mode for: " + str(i.id) + ", version -1")
            n_features_critic = len(self.memory['states_g'][i.id])
            n_features_actor = len(self.memory['states'][i.id]) + 1

            # "0 1" 1,0
            gpu_id = (i.id + 1) % 2

            with tf.device('/gpu:%d' % gpu_id):
                actor = Actor(n_features_actor, i.n_actions, i.id, -1, i.action_labels)
                critic = Critic(n_features_critic, i.id, -1)
            self.agents_gpu[gpu_id].append(i.id)
            i.set_rl_setting(actor, critic)

        # self.sess.run(tf.global_variables_initializer())
        print("model created")

    def agent_action(self, gpu_id, s_e, e_e):
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        for aid in self.agents_gpu[gpu_id]:
            self.ids[aid].actor.set_session(sess)
        sess.run(tf.global_variables_initializer())

        while (1):
            s_e.wait()
            print("action process")
            f_num = 0
            actions_new = self.memory['actions']
            train_local_view_new = self.memory['train_local_view_%d' % gpu_id]
            train_state_pool_new = self.memory['train_state_pool_%d' % gpu_id]
            states_old = self.memory['states']
            states_g_old = self.memory['states_g']
            pro = random.random()
            for i in self.TF.keys():
                for j in self.TF[i].keys():
                    for aid in self.agents_gpu[gpu_id]:
                        local_view = np.array([f_num] + states_old[aid])
                        valida = game.ids[aid].filter[j]

                        states_g_old[aid][0] = f_num
                        ss = np.array(states_g_old[aid])

                        if pro > self.episode:
                            cnm = self.ids[aid].actor.choose_action(local_view, valida, 'prob')
                        else:
                            if -1 in valida:
                                cnm = 0
                            else:
                                cnm = self.ids[aid].action_labels.index(random.choice(valida))
                        actions_new[f_num][aid] = cnm

                        train_local_view_new[f_num][aid] = local_view
                        train_state_pool_new[f_num][aid] = ss
                    f_num += 1

            self.memory['actions'] = actions_new
            self.memory['train_local_view_%d' % gpu_id] = train_local_view_new
            self.memory['train_state_pool_%d' % gpu_id] = train_state_pool_new
            e_e.set()
            s_e.clear()

    def agent_learn(self, gpu_id, s_e, e_e):
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        for aid in self.agents_gpu[gpu_id]:
            self.ids[aid].actor.set_session(sess)
            self.ids[aid].critic.set_session(sess)
        sess.run(tf.global_variables_initializer())

        while (1):
            s_e.wait()
            print("learn process")
            f_num = 0
            states_old = self.memory['states']
            states_g_old = self.memory['states_g']
            train_local_view_old = self.memory['train_local_view_%d' % gpu_id]
            train_state_pool_old = self.memory['train_state_pool_%d' % gpu_id]
            actions_old = self.memory['actions']
            rewards_old = self.memory['rewards']
            f_a_p = self.memory['flow_actual_path']
            experience_pool_new = self.memory['experience_pool_%d' % gpu_id]

            for i in self.TF.keys():
                for j in self.TF[i].keys():
                    for aid in self.agents_gpu[gpu_id]:
                        if aid not in f_a_p[f_num][:len(f_a_p[f_num]) - 1]:
                            continue
                        # ss is useless for loc
                        ss = train_state_pool_old[f_num][aid]
                        ss_ = states_g_old[aid]
                        ss_[0] = f_num
                        ss_ = np.array(ss_)

                        ac = actions_old[f_num][aid]

                        view = train_local_view_old[f_num][aid]

                        cur_exp = [ss, ss_, ac, rewards_old[aid], view]

                        if len(experience_pool_new[aid]) < self.pool_size:
                            experience_pool_new[aid].append(cur_exp)
                        else:
                            experience_pool_new[aid] = experience_pool_new[aid][1:]
                            experience_pool_new[aid].append(cur_exp)
                            indexs_learn = np.random.choice(self.pool_size, self.sample_size)
                            # time.sleep(0.0072463)
                            for k in indexs_learn:
                                exp = experience_pool_new[aid][k]
                                td_error = self.ids[aid].critic.learn(exp[0], exp[3], exp[1])
                                self.ids[aid].actor.learn(exp[4], exp[2], td_error)
                    f_num += 1
            self.memory['experience_pool_%d' % gpu_id] = experience_pool_new
            e_e.set()
            s_e.clear()