def __init__(self): """ All these vars are unchanged during game """ self.generator = HugeGenerator(91, 1) self.generator.build_topology() # TODO, modify this by dividing self.RLs = self.generator.Ms[ 2: 8] # self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 92 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids self.TF = collections.defaultdict(dict) for region in range(1, 14): header_cp = 14 + 6 * (region - 1) tail_cp = 19 + 6 * (region - 1) self.TF[0][header_cp] = 1 self.TF[0][tail_cp] = 1 nei_region = (region + 1) % 13 if nei_region == 0: nei_region = 13 header_cp_nei = 14 + 6 * (nei_region - 1) tail_cp_nei = 19 + 6 * (nei_region - 1) self.TF[header_cp + 2][header_cp_nei + 2] = 1 self.TF[header_cp + 3][header_cp_nei + 3] = 1 self.TF[header_cp][header_cp_nei] = 1 self.TF[tail_cp][tail_cp_nei] = 1 self.tf_num = 0 for i in self.TF.keys(): for j in self.TF[i].keys(): self.tf_num += 1 print("Inject " + str(self.tf_num) + " flows") self.global_optimal = 600 self.episode = 0.1 self.memory = None self.agents_gpu = collections.defaultdict(list) agents_gpu[0] = [] agents_gpu[1] = [] agents_gpu[2] = [] agents_gpu[3] = [] # for each agent, add [s,a,r,s'] as element. size self.pool_size = 16 self.sample_size = 16
class Game: def __init__(self): """ All these vars are unchanged during game """ self.generator = HugeGenerator(91, 1) self.generator.build_topology() # TODO, modify this by dividing self.RLs = self.generator.Ms[8:] self.N = 92 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids self.TF = collections.defaultdict(dict) for region in range(1, 14): header_cp = 14 + 6 * (region - 1) tail_cp = 19 + 6 * (region - 1) self.TF[0][header_cp] = 1 self.TF[0][tail_cp] = 1 nei_region = (region + 1) % 13 if nei_region == 0: nei_region = 13 header_cp_nei = 14 + 6 * (nei_region - 1) tail_cp_nei = 19 + 6 * (nei_region - 1) self.TF[header_cp + 2][header_cp_nei + 2] = 1 self.TF[header_cp + 3][header_cp_nei + 3] = 1 self.TF[header_cp][header_cp_nei] = 1 self.TF[tail_cp][tail_cp_nei] = 1 self.tf_num = 0 for i in self.TF.keys(): for j in self.TF[i].keys(): self.tf_num += 1 print("Inject " + str(self.tf_num) + " flows") self.global_optimal = 600 self.episode = 0 self.memory = None self.agents_gpu = collections.defaultdict(list) agents_gpu[0] = [] agents_gpu[1] = [] agents_gpu[2] = [] agents_gpu[3] = [] # for each agent, add [s,a,r,s'] as element. size self.pool_size = 16 self.sample_size = 16 # self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) def set_global_memory(self, m): self.memory = m def set_rl_agents(self): for i in self.RLs: print("create mode for: " + str(i.id) + ", version -1") n_features_critic = len(self.memory['states_g'][i.id]) n_features_actor = len(self.memory['states'][i.id]) + 1 # "0 1" 1,0 gpu_id = (i.id + 1) % 2 with tf.device('/gpu:%d' % gpu_id): actor = Actor(n_features_actor, i.n_actions, i.id, -1, i.action_labels) critic = Critic(n_features_critic, i.id, -1) self.agents_gpu[gpu_id].append(i.id) i.set_rl_setting(actor, critic) # self.sess.run(tf.global_variables_initializer()) print("model created") def agent_action(self, gpu_id, s_e, e_e): sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) for aid in self.agents_gpu[gpu_id]: self.ids[aid].actor.set_session(sess) sess.run(tf.global_variables_initializer()) while (1): s_e.wait() print("action process") f_num = 0 actions_new = self.memory['actions'] train_local_view_new = self.memory['train_local_view_%d' % gpu_id] train_state_pool_new = self.memory['train_state_pool_%d' % gpu_id] states_old = self.memory['states'] states_g_old = self.memory['states_g'] pro = random.random() for i in self.TF.keys(): for j in self.TF[i].keys(): for aid in self.agents_gpu[gpu_id]: local_view = np.array([f_num] + states_old[aid]) valida = game.ids[aid].filter[j] states_g_old[aid][0] = f_num ss = np.array(states_g_old[aid]) if pro > self.episode: cnm = self.ids[aid].actor.choose_action(local_view, valida, 'prob') else: if -1 in valida: cnm = 0 else: cnm = self.ids[aid].action_labels.index(random.choice(valida)) actions_new[f_num][aid] = cnm train_local_view_new[f_num][aid] = local_view train_state_pool_new[f_num][aid] = ss f_num += 1 self.memory['actions'] = actions_new self.memory['train_local_view_%d' % gpu_id] = train_local_view_new self.memory['train_state_pool_%d' % gpu_id] = train_state_pool_new e_e.set() s_e.clear() def agent_learn(self, gpu_id, s_e, e_e): sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) for aid in self.agents_gpu[gpu_id]: self.ids[aid].actor.set_session(sess) self.ids[aid].critic.set_session(sess) sess.run(tf.global_variables_initializer()) while (1): s_e.wait() print("learn process") f_num = 0 states_old = self.memory['states'] states_g_old = self.memory['states_g'] train_local_view_old = self.memory['train_local_view_%d' % gpu_id] train_state_pool_old = self.memory['train_state_pool_%d' % gpu_id] actions_old = self.memory['actions'] rewards_old = self.memory['rewards'] f_a_p = self.memory['flow_actual_path'] experience_pool_new = self.memory['experience_pool_%d' % gpu_id] for i in self.TF.keys(): for j in self.TF[i].keys(): for aid in self.agents_gpu[gpu_id]: if aid not in f_a_p[f_num][:len(f_a_p[f_num]) - 1]: continue # ss is useless for loc ss = train_state_pool_old[f_num][aid] ss_ = states_g_old[aid] ss_[0] = f_num ss_ = np.array(ss_) ac = actions_old[f_num][aid] view = train_local_view_old[f_num][aid] cur_exp = [ss, ss_, ac, rewards_old[aid], view] if len(experience_pool_new[aid]) < self.pool_size: experience_pool_new[aid].append(cur_exp) else: experience_pool_new[aid] = experience_pool_new[aid][1:] experience_pool_new[aid].append(cur_exp) indexs_learn = np.random.choice(self.pool_size, self.sample_size) # time.sleep(0.0072463) for k in indexs_learn: exp = experience_pool_new[aid][k] td_error = self.ids[aid].critic.learn(exp[0], exp[3], exp[1]) self.ids[aid].actor.learn(exp[4], exp[2], td_error) f_num += 1 self.memory['experience_pool_%d' % gpu_id] = experience_pool_new e_e.set() s_e.clear()