def __init__(self): # Internet topology self.generator = MiniGenerator(10, 2) self.generator.build_topology() self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 12 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids self.TF = collections.defaultdict(dict) self.tf_num = 10 self.TF[6][9] = 1 self.TF[6][10] = 1 self.TF[7][4] = 1 self.TF[7][5] = 1 self.TF[8][10] = 1 self.TF[8][11] = 1 self.TF[0][9] = 1 self.TF[1][11] = 1 self.TF[2][5] = 1 self.TF[3][4] = 1 self.global_optimal = 600 self.episode = 0.8 # for each agent, add [s,a,r,s'] as element. size self.experience_pool = collections.defaultdict(dict) self.pool_size = 50 self.sample_size = 10 for i in range(self.tf_num): for j in range(self.N): self.experience_pool[i][j] = []
def __init__(self): # Internet topology self.generator = MiniGenerator(10, 2) self.generator.build_topology() self.generator.build_directed_matrix() self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 12 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids # for each agent, add [s,a,r,s'] as element. size self.experience_pool = collections.defaultdict(list) self.pool_size = 10 # TODO, define TF, Matrix, Linprog self.TF = collections.defaultdict(dict) self.TF[6][9] = 1 self.TF[6][10] = 1 self.TF[7][4] = 1 self.TF[7][5] = 1 self.TF[8][10] = 1 self.TF[8][11] = 1 self.TF[0][9] = 1 self.TF[1][11] = 1 self.TF[2][5] = 1 self.TF[3][4] = 1
def __init__(self): # Internet topology self.generator = MiniGenerator(10, 2) self.generator.build_topology() self.generator.build_directed_matrix() self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 12 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids self.TF = collections.defaultdict(dict) self.tf_num = 10 self.TF[6][9] = 1 self.TF[6][10] = 1 self.TF[7][4] = 1 self.TF[7][5] = 1 self.TF[8][10] = 1 self.TF[8][11] = 1 self.TF[0][9] = 1 self.TF[1][11] = 1 self.TF[2][5] = 1 self.TF[3][4] = 1 # self.TF[9][6] = 1 # self.TF[10][6] = 1 # self.TF[4][7] = 1 # self.TF[5][7] = 1 # self.TF[10][8] = 1 # self.TF[11][8] = 1 # self.TF[9][0] = 1 # self.TF[11][1] = 1 # self.TF[5][2] = 1 # self.TF[4][3] = 1 # for each agent, add [s,a,r,s'] as element. size self.experience_pool = collections.defaultdict(dict) self.pool_size = 10 for i in range(self.tf_num): for j in range(self.N): self.experience_pool[i][j] = []
class Game: def __init__(self): # Internet topology self.generator = MiniGenerator(10, 2) self.generator.build_topology() self.generator.build_directed_matrix() self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 12 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids # for each agent, add [s,a,r,s'] as element. size self.experience_pool = collections.defaultdict(list) self.pool_size = 10 self.global_optimal = 0 self.int_optimal = 0 # TODO, define TF, Matrix, Linprog self.TF = collections.defaultdict(dict) self.TF[6][11] = 1 self.TF[8][9] = 1 self.TF[7][10] = 1 self.TF[2][5] = 1 self.TF[3][4] = 1 self.TF[4][3] = 1 self.TF[5][2] = 1 self.TF[10][7] = 1 self.TF[9][8] = 1 self.TF[11][6] = 1 self.TF[8][0] = 1 self.TF[9][1] = 1 self.TF[7][5] = 1 self.TF[10][3] = 1 # self.TF[6][9] = 1 # self.TF[6][10] = 1 # self.TF[7][0] = 1 # self.TF[7][1] = 1 # self.TF[8][10] = 1 # self.TF[8][11] = 1 # self.TF[0][9] = 1 # self.TF[1][11] = 1 # self.TF[2][5] = 1 # self.TF[3][4] = 1 # intlp = IntLp(self.generator.matrix, self.TF) # intlp.solve_ilp() # # print("===========================================") # # linear = Linprog(self.generator.matrix, self.TF) # self.global_optimal = linear.solve_linprog() def play_game(self): print("play") sess = tf.Session() print('sess') """ basic states for every node """ states = collections.defaultdict(list) for i in range(self.N): # add neighbor for j in range(len(self.generator.matrix[i])): if self.generator.matrix[i][j] == 1: states[i].append(100) # reachable end-to-end throughput (all advertised are considered here) node = self.ids[i] for d in node.table: states[i].append(0) for d in node.table_peer: states[i].append(0) for d in node.table_provider: states[i].append(0) """ create RL module """ # basic state for i in self.RLs: print("create mode for: " + str(i.id) + ", version -1") n_features = len(states[i.id]) actor = Actor(sess, n_features, i.n_actions, i.id, -1) critic = Critic(sess, n_features, i.id, -1) i.set_rl_setting(actor, critic) sess.run(tf.global_variables_initializer()) print("model created") ''' loop time as time epoch ''' sums = [] sums_random = [] TF = self.TF for time in range(self.MAX): print("begin time epoch: " + str(time)) """ choose an action id : action label """ # basic actions = {} for i in self.Ns: if i in self.RLs: s = np.array(states[i.id]) pro = random.random() if pro > 0.1: actions[i.id] = i.actor.choose_action(s) else: actions[i.id] = random.randint(0, i.n_actions - 1) else: actions[i.id] = 0 # random actions_random = {} for i in self.Ns: # node i if i in self.RLs: actions_random[i.id] = random.randint(0, i.n_actions - 1) else: actions_random[i.id] = 0 """ actual flow id : id : path """ # basic actual_flow = collections.defaultdict(dict) for i in TF.keys(): for j in TF[i].keys(): hop_path = [] cur = i hop_path.append(self.ids[cur]) flag = -1 count = 0 while cur != j: count += 1 if count > 10: flag = 1 break flag = 0 action = self.ids[cur].action_labels[actions[cur]] if action.get(j) is not None: cur = action[j] hop_path.append(self.ids[cur]) else: flag = 1 break if flag == 0: actual_flow[i][j] = hop_path num = 0 if time == 0: for i in actual_flow.keys(): for j in actual_flow[i].keys(): num += 1 print('actual flow: ' + str(num)) # random actual_flow_random = collections.defaultdict(dict) for i in TF.keys(): for j in TF[i].keys(): hop_path = [] cur = i hop_path.append(self.ids[cur]) flag = -1 count = 0 while cur != j: count += 1 if count > 10: flag = 1 break flag = 0 action = self.ids[cur].action_labels[actions_random[cur]] if action.get(j) is not None: cur = action[j] hop_path.append(self.ids[cur]) else: flag = 1 break if flag == 0: actual_flow_random[i][j] = hop_path """ link load id : id : V """ # basic link_load = np.zeros([self.N, self.N]) for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] for k in range(len(path) - 1): e1 = path[k] e2 = path[k + 1] link_load[e1.id][e2.id] += TF[i][j] link_load[e2.id][e1.id] += TF[i][j] # random link_load_random = np.zeros([self.N, self.N]) for i in actual_flow_random.keys(): for j in actual_flow_random[i].keys(): path = actual_flow_random[i][j] for k in range(len(path) - 1): e1 = path[k] e2 = path[k + 1] link_load_random[e1.id][e2.id] += TF[i][j] link_load_random[e2.id][e1.id] += TF[i][j] """ ee throughput id : id : T """ # basic ee_throughput = np.zeros([self.N, self.N]) for i in actual_flow.keys(): # input node i for j in actual_flow[i].keys(): path = actual_flow[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] # TODO, enlarge link capacity of TT if link_load[node1.id][node2.id] == 0: ee = 100 else: ee = 100 / link_load[node1.id][node2.id] if ee < temp_min: temp_min = ee ee_throughput[i][j] = temp_min # random ee_throughput_random = np.zeros([self.N, self.N]) for i in actual_flow_random.keys(): # input node i for j in actual_flow_random[i].keys(): path = actual_flow_random[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] # TODO, modify here, and the state ee = 100 / link_load_random[node1.id][node2.id] if ee < temp_min: temp_min = ee ee_throughput_random[i][j] = temp_min """ next basic states for every node, neighbor part """ states_ = collections.defaultdict(list) for i in range(self.N): for j in range(len(self.generator.matrix[i])): if self.generator.matrix[i][j] == 1: if link_load[i][j] == 0: states_[i].append(100) else: states_[i].append(100 / link_load[i][j]) """ reward, basic states, ee part """ # basic rewards = {} for agent in self.RLs: temp_table = collections.defaultdict(list) for des in agent.table: temp_table[des].append(0) for des in agent.table_peer: temp_table[des].append(0) for des in agent.table_provider: temp_table[des].append(0) sum_flow = 0 sum_ee = 0 for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] if agent in path and agent is not path[-1]: sum_flow += 1 sum_ee += ee_throughput[i][j] temp_table[j].append(ee_throughput[i][j]) if sum_flow == 0: rewards[agent.id] = 0 else: rewards[agent.id] = sum_ee / sum_flow for i in temp_table: avg = sum(temp_table[i]) / len(temp_table[i]) states_[agent.id].append(avg) """ system throughput """ # basic sum_all = 0 for i in range(self.N): for j in range(self.N): sum_all += ee_throughput[i][j] # random sum_all_random = 0 for i in range(self.N): for j in range(self.N): sum_all_random += ee_throughput_random[i][j] """ agent learns through s, a, r, s_ """ # basic for agent in self.RLs: s = np.array(states[agent.id]) r = rewards[agent.id] s_ = np.array(states_[agent.id]) a = actions[agent.id] exp = [] exp.append(s) exp.append(r) exp.append(a) exp.append(s_) if len(self.experience_pool[agent.id]) < self.pool_size: self.experience_pool[agent.id].append(exp) else: self.experience_pool[agent.id] = self.experience_pool[agent.id][1:] self.experience_pool[agent.id].append(exp) experience = random.choice(self.experience_pool[agent.id]) s = experience[0] r = experience[1] a = experience[2] s_ = experience[3] td_error = agent.critic.learn(s, r, s_) agent.actor.learn(s, a, td_error) states = states_ sums.append(sum_all) sums_random.append(sum_all_random) if sum_all_random > self.global_optimal: self.global_optimal = sum_all_random if sum_all > self.global_optimal: self.global_optimal = sum_all print('mini-rl: ' + str(sum_all)) print('random: ' + str(sum_all_random)) print('global optimal: ' + str(self.global_optimal)) if time % 3000 == 0 and time != 0: str1 = 'basic-mini-sums' + str(time) + '.txt' file = open(str1, 'w') file.write(str(sums)) file.close() str2 = 'basic-mini-sums_random' + str(time) + '.txt' file = open(str2, 'w') file.write(str(sums_random)) file.close()
class Game: def __init__(self): # Internet topology self.generator = MiniGenerator(10, 2) self.generator.build_topology() self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 12 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids self.TF = collections.defaultdict(dict) self.tf_num = 10 self.TF[6][9] = 1 self.TF[6][10] = 1 self.TF[7][4] = 1 self.TF[7][5] = 1 self.TF[8][10] = 1 self.TF[8][11] = 1 self.TF[0][9] = 1 self.TF[1][11] = 1 self.TF[2][5] = 1 self.TF[3][4] = 1 self.global_optimal = 600 self.episode = 0.8 # for each agent, add [s,a,r,s'] as element. size self.experience_pool = collections.defaultdict(dict) self.pool_size = 50 self.sample_size = 10 for i in range(self.tf_num): for j in range(self.N): self.experience_pool[i][j] = [] def play_game(self): print("play") sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) print('sess') """ init states for every node: src, dst, neighbor, e-e, others' actions """ # basic states states = collections.defaultdict(list) for i in range(self.N): # add neighbor for j in range(len(self.generator.matrix[i])): if self.generator.matrix[i][j] == 1: states[i].append(0) # reachable end-to-end throughput (all advertised are considered here) node = self.ids[i] for d in node.table: states[i].append(0) for d in node.table_peer: states[i].append(0) for d in node.table_provider: states[i].append(0) # combine others' states and last actions for each flow states_g = collections.defaultdict(list) for i in range(self.N): # add flow num states_g[i] = [0] # all agents' basic states for j in range(self.N): states_g[i] += states[j] # actions of all agents for all flows for k in range(self.tf_num): for q in range(self.N): states_g[i].append(0) """ create RL module """ # TODO, give the action_labels for i in self.RLs: print("create mode for: " + str(i.id) + ", version -1") n_features_critic = len(states_g[i.id]) # len(states[i.id]) + 1 n_features_actor = len(states[i.id]) + 1 dqn = DeepQNetwork(sess, n_features_critic, i.n_actions, i.id, i.action_labels) i.set_dqn(dqn) sess.run(tf.global_variables_initializer()) print("model created") """ loop time as time epoch """ TF = self.TF # keep updating actions = collections.defaultdict(dict) f_num = 0 for i in TF.keys(): for j in TF[i].keys(): for k in range(self.N): # TODO, init without path error valida = self.ids[k].filter[j] if -1 in valida: actions[f_num][k] = 0 else: actions[f_num][k] = self.ids[k].action_labels.index( random.choice(valida)) f_num += 1 sums = [] for time in range(self.MAX): if time % 3000 == 0 and time != 0: self.episode /= 2 # if time % 100 == 0 and time != 0: # states = collections.defaultdict(list) # for i in range(self.N): # # add neighbor # for j in range(len(self.generator.matrix[i])): # if self.generator.matrix[i][j] == 1: # states[i].append(0) # # # reachable end-to-end throughput (all advertised are considered here) # node = self.ids[i] # for d in node.table: states[i].append(0) # for d in node.table_peer: states[i].append(0) # for d in node.table_provider: states[i].append(0) # # # combine others' states and last actions for each flow # states_g = collections.defaultdict(list) # for i in range(self.N): # # add flow num # states_g[i] = [0] # # all agents' basic states # for j in range(self.N): # states_g[i] += states[j] # # actions of all agents for all flows # for k in range(self.tf_num): # for q in range(self.N): # states_g[i].append(0) # # actions = collections.defaultdict(dict) # f_num = 0 # for i in TF.keys(): # for j in TF[i].keys(): # for k in range(self.N): # # TODO, init without path error # valida = self.ids[k].filter[j] # if -1 in valida: # actions[f_num][k] = 0 # else: # actions[f_num][k] = self.ids[k].action_labels.index(random.choice(valida)) # f_num += 1 print("time: " + str(time)) pro = random.random() train_state_pool = collections.defaultdict(dict) train_local_view = collections.defaultdict(dict) flow_num = 0 sum_all = 0 flow_actual_path = collections.defaultdict(list) rewards = {} for i in TF.keys(): for j in TF[i].keys(): for agent in self.Ns: # store state and state' train_state_pool[flow_num][agent.id] = [] # specific one node to one flow states_g[agent.id][0] = flow_num ss = np.array(states_g[agent.id]) local_view = np.array([flow_num] + states[agent.id]) if pro > self.episode: # TODO, use filter process, give the valid next-hops valida = agent.filter[j] if time >= 20000: cnm = agent.dqn.choose_action(ss, valida) else: cnm = agent.dqn.choose_action(ss, valida) actions[flow_num][agent.id] = cnm # TODO, random need filter else: valida = agent.filter[j] if -1 in valida: actions[flow_num][agent.id] = 0 else: actions[flow_num][ agent.id] = agent.action_labels.index( random.choice(valida)) train_state_pool[flow_num][agent.id].append(ss) train_local_view[flow_num][agent.id] = local_view # update states to ss_ states_, rewards, sum_all, hh = self.update_state( flow_num, actions) flow_actual_path[flow_num] = hh states_g_ = collections.defaultdict(list) for k in range(self.N): states_g_[k] = [flow_num] for q in range(self.N): states_g_[k] += states_[q] for z in actions.keys(): for x in actions[z].keys(): states_g_[k].append(actions[z][x]) flow_num += 1 states_g = states_g_ states = states_ flow_num = 0 for i in TF.keys(): for j in TF[i].keys(): for agent in self.Ns: # TODO, only make useful agents to learn if agent not in flow_actual_path[ flow_num][:len(flow_actual_path[flow_num]) - 1]: continue ss = train_state_pool[flow_num][agent.id][0] ss_ = states_g[agent.id] # states[agent.id] # ss_[0] = flow_num ss_ = np.array(ss_) # ss_ = np.array([flow_num] + ss_) view = train_local_view[flow_num][agent.id] cur_exp = [ ss, ss_, actions[flow_num][agent.id], rewards[agent.id], view ] agent.dqn.store_transition(cur_exp[0], cur_exp[2], cur_exp[3], cur_exp[1]) agent.dqn.learn() flow_num += 1 sums.append(sum_all) print('game12-dqn-glb: ' + str(sum_all)) if time % 3000 == 0 and time != 0: str1 = 'game12-dqn-glb' + str(time) + '.txt' file = open(str1, 'w') file.write(str(sums)) file.close() def update_state(self, cur_flow, actions): TF = self.TF actual_flow = collections.defaultdict(dict) flow_num = 0 path_return = [] for i in TF.keys(): for j in TF[i].keys(): hop_path = [] cur = i hop_path.append(self.ids[cur]) flag = -1 count = 0 while cur != j: count += 1 if count > 10: flag = 1 break flag = 0 # TODO, action type changed action = self.ids[cur].action_labels[actions[flow_num] [cur]] cur = action hop_path.append(self.ids[cur]) if flag == 0: actual_flow[i][j] = hop_path if flow_num == cur_flow: path_return = hop_path else: print("error in hop path") print("==================from " + str(i) + ' to ' + str(j)) for i in hop_path: print(i.id) print(actions) sys.exit(0) flow_num += 1 link_load = np.zeros([self.N, self.N]) for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] for k in range(len(path) - 1): e1 = path[k] e2 = path[k + 1] link_load[e1.id][e2.id] += TF[i][j] link_load[e2.id][e1.id] += TF[i][j] # store flow information on each link # node1:node2:[[src,dst],[src,dst]] # node2:node1:[[src,dst],...] link_flow_records = collections.defaultdict(dict) for i in range(self.N): for j in range(self.N): if self.generator.matrix[i][j] == 1: link_flow_records[i][j] = [] link_flow_records[j][i] = [] ee_throughput = np.zeros([self.N, self.N]) for i in actual_flow.keys(): # input node i for j in actual_flow[i].keys(): flow = [i, j] path = actual_flow[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] # record flow 'i j' on link 'node1 node2' link_flow_records[node1.id][node2.id].append(flow) link_flow_records[node2.id][node1.id].append(flow) ee = 100 / link_load[node1.id][node2.id] if ee < temp_min: temp_min = ee ee_throughput[i][j] = temp_min # ee is basic throughput for each flow, need to be increased link_residue = collections.defaultdict(dict) for i in range(self.N): for j in range(self.N): if self.generator.matrix[i][j] == 1: # for link 'i j' remain = 100 for flow in link_flow_records[i][j]: remain -= ee_throughput[flow[0]][flow[1]] link_residue[i][j] = remain link_residue[j][i] = remain # increase each flow throughput, update link_residue for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] if link_residue[node1.id][node2.id] < temp_min: temp_min = link_residue[node1.id][node2.id] # increase if temp_min == 0: continue ee_throughput[i][j] += temp_min # update for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] link_residue[node1.id][node2.id] -= temp_min link_residue[node2.id][node1.id] -= temp_min states_ = collections.defaultdict(list) for i in range(self.N): for j in range(len(self.generator.matrix[i])): if self.generator.matrix[i][j] == 1: states_[i].append(link_load[i][j]) # if link_load[i][j] == 0: # states_[i].append(100) # else: # states_[i].append(100 / link_load[i][j]) rewards = {} for agent in self.RLs: temp_table = collections.defaultdict(list) for des in agent.table: temp_table[des].append(0) for des in agent.table_peer: temp_table[des].append(0) for des in agent.table_provider: temp_table[des].append(0) sum_flow = 0 sum_ee = 0 for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] if agent in path and agent is not path[-1]: sum_flow += 1 sum_ee += ee_throughput[i][j] temp_table[j].append(ee_throughput[i][j]) if sum_flow == 0: rewards[agent.id] = 0 else: rewards[agent.id] = sum_ee / sum_flow for i in temp_table: avg = sum(temp_table[i]) / len(temp_table[i]) states_[agent.id].append(avg) sum_all = 0 for i in range(self.N): for j in range(self.N): sum_all += ee_throughput[i][j] return states_, rewards, sum_all, path_return
class Game: def __init__(self): # Internet topology self.generator = MiniGenerator(10, 2) self.generator.build_topology() self.generator.build_directed_matrix() self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 12 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids self.TF = collections.defaultdict(dict) self.tf_num = 10 self.TF[6][9] = 1 self.TF[6][10] = 1 self.TF[7][4] = 1 self.TF[7][5] = 1 self.TF[8][10] = 1 self.TF[8][11] = 1 self.TF[0][9] = 1 self.TF[1][11] = 1 self.TF[2][5] = 1 self.TF[3][4] = 1 # self.TF[9][6] = 1 # self.TF[10][6] = 1 # self.TF[4][7] = 1 # self.TF[5][7] = 1 # self.TF[10][8] = 1 # self.TF[11][8] = 1 # self.TF[9][0] = 1 # self.TF[11][1] = 1 # self.TF[5][2] = 1 # self.TF[4][3] = 1 # for each agent, add [s,a,r,s'] as element. size self.experience_pool = collections.defaultdict(dict) self.pool_size = 10 for i in range(self.tf_num): for j in range(self.N): self.experience_pool[i][j] = [] def play_game(self): print("play") sess = tf.Session() print('sess') """ init states for every node: src, dst, neighbor, e-e, others' actions """ states = collections.defaultdict(list) for i in range(self.N): # add flow number states[i].append(0) # add neighbor for j in range(len(self.generator.matrix[i])): if self.generator.matrix[i][j] == 1: states[i].append(100) # reachable end-to-end throughput (all advertised are considered here) node = self.ids[i] for d in node.table: states[i].append(0) for d in node.table_peer: states[i].append(0) for d in node.table_provider: states[i].append(0) # TODO, consider how to combine actions """ create RL module """ for i in self.RLs: print("create mode for: " + str(i.id) + ", version -1") n_features = len(states[i.id]) actor = Actor(sess, n_features, i.n_actions, i.id, -1) critic = Critic(sess, n_features, i.id, -1) i.set_rl_setting(actor, critic) sess.run(tf.global_variables_initializer()) print("model created") """ loop time as time epoch """ TF = self.TF # keep updating actions = collections.defaultdict(dict) for i in range(self.tf_num): for j in range(self.N): actions[i][j] = 0 sums = [] for time in range(self.MAX): print("begin time epoch: " + str(time)) train_state_pool = collections.defaultdict(dict) flow_num = 0 sum_all = 0 rewards = {} for i in TF.keys(): for j in TF[i].keys(): for agent in self.Ns: # store state and state' train_state_pool[flow_num][agent.id] = [] # specific one node to one flow states[agent.id][0] = flow_num ss = np.array(states[agent.id]) pro = random.random() if pro > 0.1: actions[flow_num][ agent.id] = agent.actor.choose_action(ss) else: actions[flow_num][agent.id] = random.randint( 0, agent.n_actions - 1) train_state_pool[flow_num][agent.id].append(ss) # update states to ss_ states_, rewards, sum_all = self.update_state( flow_num, actions) flow_num += 1 states = states_ # TODO, add experience replay flow_num = 0 for i in TF.keys(): for j in TF[i].keys(): for agent in self.Ns: ss = train_state_pool[flow_num][agent.id][0] ss_ = states[agent.id] ss_[0] = flow_num ss_ = np.array(ss_) exp = [] exp.append(ss) exp.append(rewards[agent.id]) exp.append(actions[flow_num][agent.id]) exp.append(ss_) if len(self.experience_pool[flow_num][ agent.id]) < self.pool_size: self.experience_pool[flow_num][agent.id].append( exp) else: self.experience_pool[flow_num][ agent.id] = self.experience_pool[flow_num][ agent.id][1:] self.experience_pool[flow_num][agent.id].append( exp) experience = random.choice( self.experience_pool[flow_num][agent.id]) s = experience[0] r = experience[1] a = experience[2] s_ = experience[3] td_error = agent.critic.learn(s, r, s_) agent.actor.learn(s, a, td_error) flow_num += 1 sums.append(sum_all) print('cut-rl-tf10-exp: ' + str(sum_all)) if time % 500 == 0 and time != 0: str1 = 'cut-final-tf10-exp-sums' + str(time) + '.txt' file = open(str1, 'w') file.write(str(sums)) file.close() def update_state(self, cur_flow, actions): TF = self.TF actual_flow = collections.defaultdict(dict) flow_num = 0 for i in TF.keys(): for j in TF[i].keys(): hop_path = [] cur = i hop_path.append(self.ids[cur]) flag = -1 count = 0 while cur != j: count += 1 if count > 10: print("error in hop path") flag = 1 break flag = 0 action = self.ids[cur].action_labels[actions[flow_num] [cur]] if action.get(j) is not None: cur = action[j] hop_path.append(self.ids[cur]) else: flag = 1 print("error in hop path") break if flag == 0: actual_flow[i][j] = hop_path flow_num += 1 link_load = np.zeros([self.N, self.N]) for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] for k in range(len(path) - 1): e1 = path[k] e2 = path[k + 1] link_load[e1.id][e2.id] += TF[i][j] link_load[e2.id][e1.id] += TF[i][j] # store flow information on each link # node1:node2:[[src,dst],[src,dst]] # node2:node1:[[src,dst],...] link_flow_records = collections.defaultdict(dict) for i in range(self.N): for j in range(self.N): if self.generator.matrix[i][j] == 1: link_flow_records[i][j] = [] link_flow_records[j][i] = [] ee_throughput = np.zeros([self.N, self.N]) for i in actual_flow.keys(): # input node i for j in actual_flow[i].keys(): flow = [i, j] path = actual_flow[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] # record flow 'i j' on link 'node1 node2' link_flow_records[node1.id][node2.id].append(flow) link_flow_records[node2.id][node1.id].append(flow) ee = 100 / link_load[node1.id][node2.id] if ee < temp_min: temp_min = ee ee_throughput[i][j] = temp_min # ee is basic throughput for each flow, need to be increased link_residue = collections.defaultdict(dict) for i in range(self.N): for j in range(self.N): if self.generator.matrix[i][j] == 1: # for link 'i j' remain = 100 for flow in link_flow_records[i][j]: remain -= ee_throughput[flow[0]][flow[1]] link_residue[i][j] = remain link_residue[j][i] = remain # TODO, increase each flow throughput, update link_residue for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] if link_residue[node1.id][node2.id] < temp_min: temp_min = link_residue[node1.id][node2.id] # increase if temp_min == 0: continue ee_throughput[i][j] += temp_min # update for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] link_residue[node1.id][node2.id] -= temp_min link_residue[node2.id][node1.id] -= temp_min states_ = collections.defaultdict(list) for i in range(self.N): states_[i].append(cur_flow) for j in range(len(self.generator.matrix[i])): if self.generator.matrix[i][j] == 1: if link_load[i][j] == 0: states_[i].append(100) else: states_[i].append(100 / link_load[i][j]) rewards = {} for agent in self.RLs: temp_table = collections.defaultdict(list) for des in agent.table: temp_table[des].append(0) for des in agent.table_peer: temp_table[des].append(0) for des in agent.table_provider: temp_table[des].append(0) sum_flow = 0 sum_ee = 0 for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] if agent in path and agent is not path[-1]: sum_flow += 1 sum_ee += ee_throughput[i][j] temp_table[j].append(ee_throughput[i][j]) if sum_flow == 0: rewards[agent.id] = 0 else: rewards[agent.id] = sum_ee / sum_flow for i in temp_table: avg = sum(temp_table[i]) / len(temp_table[i]) states_[agent.id].append(avg) sum_all = 0 for i in range(self.N): for j in range(self.N): sum_all += ee_throughput[i][j] return states_, rewards, sum_all
class Game: def __init__(self): # Internet topology self.generator = MiniGenerator(10, 2) self.generator.build_topology() self.generator.build_directed_matrix() self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 12 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids # for each agent, add [s,a,r,s'] as element. size self.experience_pool = collections.defaultdict(list) self.pool_size = 10 # TODO, define TF, Matrix, Linprog self.TF = collections.defaultdict(dict) self.TF[6][9] = 1 self.TF[6][10] = 1 self.TF[7][4] = 1 self.TF[7][5] = 1 self.TF[8][10] = 1 self.TF[8][11] = 1 self.TF[0][9] = 1 self.TF[1][11] = 1 self.TF[2][5] = 1 self.TF[3][4] = 1 def play_game(self): """ loop time as time epoch """ TF = self.TF # keep updating actions = collections.defaultdict(dict) for i in range(10): for j in range(self.N): actions[i][j] = 0 sums = [] for time in range(self.MAX): print("begin time epoch: " + str(time)) train_state_pool = collections.defaultdict(dict) flow_num = 0 sum_all = 0 for i in TF.keys(): for j in TF[i].keys(): for agent in self.Ns: actions[flow_num][agent.id] = random.randint( 0, agent.n_actions - 1) # update states to ss_ sum_all = self.update_state(flow_num, actions) flow_num += 1 sums.append(sum_all) print('cut-random: ' + str(sum_all)) if time % 10000 == 0 and time != 0: str1 = 'cut-mini-random' + str(time) + '.txt' file = open(str1, 'w') file.write(str(sums)) file.close() def update_state(self, cur_flow, actions): TF = self.TF actual_flow = collections.defaultdict(dict) flow_num = 0 for i in TF.keys(): for j in TF[i].keys(): hop_path = [] cur = i hop_path.append(self.ids[cur]) flag = -1 count = 0 while cur != j: count += 1 if count > 10: print("error in hop path") flag = 1 break flag = 0 action = self.ids[cur].action_labels[actions[flow_num] [cur]] if action.get(j) is not None: cur = action[j] hop_path.append(self.ids[cur]) else: flag = 1 print("error in hop path") break if flag == 0: actual_flow[i][j] = hop_path flow_num += 1 link_load = np.zeros([self.N, self.N]) for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] for k in range(len(path) - 1): e1 = path[k] e2 = path[k + 1] link_load[e1.id][e2.id] += TF[i][j] link_load[e2.id][e1.id] += TF[i][j] # store flow information on each link # node1:node2:[[src,dst],[src,dst]] # node2:node1:[[src,dst],...] link_flow_records = collections.defaultdict(dict) for i in range(self.N): for j in range(self.N): if self.generator.matrix[i][j] == 1: link_flow_records[i][j] = [] link_flow_records[j][i] = [] ee_throughput = np.zeros([self.N, self.N]) for i in actual_flow.keys(): # input node i for j in actual_flow[i].keys(): flow = [i, j] path = actual_flow[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] # record flow 'i j' on link 'node1 node2' link_flow_records[node1.id][node2.id].append(flow) link_flow_records[node2.id][node1.id].append(flow) ee = 100 / link_load[node1.id][node2.id] if ee < temp_min: temp_min = ee ee_throughput[i][j] = temp_min # ee is basic throughput for each flow, need to be increased link_residue = collections.defaultdict(dict) for i in range(self.N): for j in range(self.N): if self.generator.matrix[i][j] == 1: # for link 'i j' remain = 100 for flow in link_flow_records[i][j]: remain -= ee_throughput[flow[0]][flow[1]] link_residue[i][j] = remain link_residue[j][i] = remain # TODO, increase each flow throughput, update link_residue for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] if link_residue[node1.id][node2.id] < temp_min: temp_min = link_residue[node1.id][node2.id] # increase if temp_min == 0: continue ee_throughput[i][j] += temp_min # update for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] link_residue[node1.id][node2.id] -= temp_min link_residue[node2.id][node1.id] -= temp_min sum_all = 0 for i in range(self.N): for j in range(self.N): sum_all += ee_throughput[i][j] return sum_all
def __init__(self): # Internet topology self.generator = MiniGenerator(10, 2) self.generator.build_topology() self.generator.build_directed_matrix() self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 12 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids # define time epoch self.tf_num = 20 self.TF_time = collections.defaultdict(dict) self.TF_time[6][9] = 3 self.TF_time[6][10] = 2 self.TF_time[7][4] = 1 self.TF_time[7][5] = 3 self.TF_time[8][10] = 2 self.TF_time[8][11] = 1 self.TF_time[0][9] = 1 self.TF_time[1][11] = 3 self.TF_time[2][5] = 2 self.TF_time[3][4] = 3 self.TF_time[6][11] = 2 self.TF_time[7][10] = 2 self.TF_time[8][4] = 1 self.TF_time[2][4] = 3 self.TF_time[3][5] = 2 self.TF_time[0][10] = 1 self.TF_time[1][9] = 1 self.TF_time[6][11] = 3 self.TF_time[8][5] = 2 self.TF_time[3][10] = 1 self.TF_id = [] self.TF_id.append([6, 9]) self.TF_id.append([6, 10]) self.TF_id.append([7, 4]) self.TF_id.append([7, 5]) self.TF_id.append([8, 10]) self.TF_id.append([8, 11]) self.TF_id.append([0, 9]) self.TF_id.append([1, 11]) self.TF_id.append([2, 5]) self.TF_id.append([3, 4]) self.TF_id.append([6, 11]) self.TF_id.append([7, 10]) self.TF_id.append([8, 4]) self.TF_id.append([2, 4]) self.TF_id.append([3, 5]) self.TF_id.append([0, 10]) self.TF_id.append([1, 9]) self.TF_id.append([6, 11]) self.TF_id.append([8, 5]) self.TF_id.append([3, 10]) self.global_optimal = 600 self.episode = 0.2 # for each agent, add [s,a,r,s'] as element. size self.experience_pool = collections.defaultdict(dict) self.pool_size = 200 self.sample_size = 16
class Game: def __init__(self): # Internet topology self.generator = MiniGenerator(10, 2) self.generator.build_topology() self.generator.build_directed_matrix() self.RLs = self.generator.Ts + self.generator.Cs + self.generator.Ms + self.generator.CPs self.N = 12 self.Ns = self.generator.Ts + self.generator.Ms + self.generator.CPs + self.generator.Cs self.MAX = 100000 self.ids = self.generator.ids # define time epoch self.tf_num = 20 self.TF_time = collections.defaultdict(dict) self.TF_time[6][9] = 3 self.TF_time[6][10] = 2 self.TF_time[7][4] = 1 self.TF_time[7][5] = 3 self.TF_time[8][10] = 2 self.TF_time[8][11] = 1 self.TF_time[0][9] = 1 self.TF_time[1][11] = 3 self.TF_time[2][5] = 2 self.TF_time[3][4] = 3 self.TF_time[6][11] = 2 self.TF_time[7][10] = 2 self.TF_time[8][4] = 1 self.TF_time[2][4] = 3 self.TF_time[3][5] = 2 self.TF_time[0][10] = 1 self.TF_time[1][9] = 1 self.TF_time[6][11] = 3 self.TF_time[8][5] = 2 self.TF_time[3][10] = 1 self.TF_id = [] self.TF_id.append([6, 9]) self.TF_id.append([6, 10]) self.TF_id.append([7, 4]) self.TF_id.append([7, 5]) self.TF_id.append([8, 10]) self.TF_id.append([8, 11]) self.TF_id.append([0, 9]) self.TF_id.append([1, 11]) self.TF_id.append([2, 5]) self.TF_id.append([3, 4]) self.TF_id.append([6, 11]) self.TF_id.append([7, 10]) self.TF_id.append([8, 4]) self.TF_id.append([2, 4]) self.TF_id.append([3, 5]) self.TF_id.append([0, 10]) self.TF_id.append([1, 9]) self.TF_id.append([6, 11]) self.TF_id.append([8, 5]) self.TF_id.append([3, 10]) self.global_optimal = 600 self.episode = 0.2 # for each agent, add [s,a,r,s'] as element. size self.experience_pool = collections.defaultdict(dict) self.pool_size = 200 self.sample_size = 16 # self.randgame = RandomGame() def play_game(self): print("play") sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) print('sess') """ init states for every node: src, dst, neighbor, e-e, others' actions """ # basic states states = collections.defaultdict(list) for i in range(self.N): # add neighbor for j in range(len(self.generator.matrix[i])): if self.generator.matrix[i][j] == 1: states[i].append(0) # reachable end-to-end throughput (all advertised are considered here) node = self.ids[i] for d in node.table: states[i].append(0) for d in node.table_peer: states[i].append(0) for d in node.table_provider: states[i].append(0) # TODO, combine others' states and last actions for each flow states_g = collections.defaultdict(list) for i in range(self.N): # add flow num states_g[i] = [0] # all agents' basic states for j in range(self.N): states_g[i] += states[j] # actions of all agents for all flows for k in range(self.tf_num): for q in range(self.N): states_g[i].append(0) """ create RL module """ for i in self.RLs: print("create mode for: " + str(i.id) + ", version -1") n_features_critic = len(states_g[i.id]) # len(states[i.id]) + 1 n_features_actor = len(states[i.id]) + 1 actor = Actor(sess, n_features_actor, i.n_actions, i.id, -1, i.action_labels) critic = Critic(sess, n_features_critic, i.id, -1) i.set_rl_setting(actor, critic) sess.run(tf.global_variables_initializer()) print("model created") """ loop time as time epoch """ # keep updating actions = collections.defaultdict(dict) for i in self.TF_time.keys(): for j in self.TF_time[i].keys(): f_num = self.TF_id.index([i, j]) for k in range(self.N): # TODO, init without path error valida = self.ids[k].filter[j] if -1 in valida: actions[f_num][k] = 0 else: actions[f_num][k] = self.ids[k].action_labels.index( random.choice(valida)) sums = [] sums_rand = [] # init tf TF = collections.defaultdict(dict) TF[6][9] = 0 TF[6][10] = 0 TF[7][4] = 0 TF[7][5] = 0 TF[8][10] = 0 TF[8][11] = 0 TF[0][9] = 0 TF[1][11] = 0 TF[2][5] = 0 TF[3][4] = 0 queue = collections.deque([10, 11, 12, 13, 14, 15, 16, 17, 18, 19]) for time in range(self.MAX): # check if any flow out time expire = [] for i in TF.keys(): for j in TF[i].keys(): if TF[i][j] > self.TF_time[i][j]: # inject new flow from queue # store old flow to queue expire.append([i, j]) for f in expire: i = f[0] j = f[1] TF[i].pop(j) queue.append(self.TF_id.index([i, j])) for k in actions[self.TF_id.index([i, j])].keys(): actions[self.TF_id.index([i, j])][k] = 0 new_flow = queue.popleft() TF[self.TF_id[new_flow][0]][self.TF_id[new_flow][1]] = 0 for round in range(500): # rand_value = self.randgame.play_game(TF, self.tf_num) whole_time = time * 10 + round print("time: " + str(whole_time)) if whole_time % 3000 == 0 and whole_time != 0: self.episode /= 2 pro = random.random() train_state_pool = collections.defaultdict(dict) train_local_view = collections.defaultdict(dict) flow_actual_path = collections.defaultdict(list) sum_all = 0 rewards = {} for i in TF.keys(): for j in TF[i].keys(): for agent in self.Ns: flow_num = self.TF_id.index([i, j]) # store state and state' train_state_pool[flow_num][agent.id] = [] # specific one node to one flow states_g[agent.id][0] = flow_num ss = np.array(states_g[agent.id]) local_view = np.array([flow_num] + states[agent.id]) if pro > self.episode: valida = agent.filter[j] cnm = agent.actor.choose_action(local_view, valida, method='prob') actions[flow_num][agent.id] = cnm else: valida = agent.filter[j] if -1 in valida: actions[flow_num][agent.id] = 0 else: actions[flow_num][ agent.id] = agent.action_labels.index( random.choice(valida)) train_state_pool[flow_num][agent.id].append(ss) train_local_view[flow_num][agent.id] = local_view flow_num = self.TF_id.index([i, j]) # update states to ss_ states_, rewards, sum_all, hh = self.update_state( TF, flow_num, actions) flow_actual_path[flow_num] = hh states_g_ = collections.defaultdict(list) for k in range(self.N): states_g_[k] = [flow_num] for q in range(self.N): states_g_[k] += states_[q] for z in actions.keys(): for x in actions[z].keys(): states_g_[k].append(actions[z][x]) states_g = states_g_ states = states_ for i in TF.keys(): for j in TF[i].keys(): flow_num = self.TF_id.index([i, j]) for agent in self.Ns: if agent not in flow_actual_path[flow_num][:len( flow_actual_path[flow_num]) - 1]: continue ss = train_state_pool[flow_num][agent.id][0] ss_ = states_g[agent.id] # states[agent.id] # ss_[0] = flow_num ss_ = np.array(ss_) # ss_ = np.array([flow_num] + ss_) view = train_local_view[flow_num][agent.id] cur_exp = [ ss, ss_, actions[flow_num][agent.id], rewards[agent.id], view ] td_error = agent.critic.learn( cur_exp[0], cur_exp[3], cur_exp[1]) agent.actor.learn(cur_exp[4], cur_exp[2], td_error) sums.append(sum_all) # sums_rand.append(rand_value) print('game12-dtf-glb: ' + str(sum_all)) # print('random: ' + str(rand_value)) if whole_time % 3000 == 0 and whole_time != 0: str1 = 'game12-dtf-glb' + str(time) + '.txt' file = open(str1, 'w') file.write(str(sums)) file.close() str2 = 'game12-dtf-rand' + str(time) + '.txt' file = open(str2, 'w') file.write(str(sums_rand)) file.close() for i in TF.keys(): for j in TF[i].keys(): TF[i][j] += 1 def update_state(self, tf, cur_flow, actions): TF = tf actual_flow = collections.defaultdict(dict) for i in TF.keys(): for j in TF[i].keys(): flow_num = self.TF_id.index([i, j]) hop_path = [] cur = i hop_path.append(self.ids[cur]) flag = -1 count = 0 while cur != j: count += 1 if count > 10: print("error in hop path") flag = 1 break flag = 0 action = self.ids[cur].action_labels[actions[flow_num] [cur]] cur = action hop_path.append(self.ids[cur]) if flag == 0: actual_flow[i][j] = hop_path if flow_num == cur_flow: path_return = hop_path else: print("error in hop path") print("==================from " + str(i) + ' to ' + str(j)) for i in hop_path: print(i.id) print(actions) print(i.actor.oldp) print(i.actor.newp) sys.exit(0) link_load = np.zeros([self.N, self.N]) for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] for k in range(len(path) - 1): e1 = path[k] e2 = path[k + 1] link_load[e1.id][e2.id] += 1 link_load[e2.id][e1.id] += 1 # store flow information on each link # node1:node2:[[src,dst],[src,dst]] # node2:node1:[[src,dst],...] link_flow_records = collections.defaultdict(dict) for i in range(self.N): for j in range(self.N): if self.generator.matrix[i][j] == 1: link_flow_records[i][j] = [] link_flow_records[j][i] = [] ee_throughput = np.zeros([self.N, self.N]) for i in actual_flow.keys(): # input node i for j in actual_flow[i].keys(): flow = [i, j] path = actual_flow[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] # record flow 'i j' on link 'node1 node2' link_flow_records[node1.id][node2.id].append(flow) link_flow_records[node2.id][node1.id].append(flow) ee = 100 / link_load[node1.id][node2.id] if ee < temp_min: temp_min = ee ee_throughput[i][j] = temp_min # ee is basic throughput for each flow, need to be increased link_residue = collections.defaultdict(dict) for i in range(self.N): for j in range(self.N): if self.generator.matrix[i][j] == 1: # for link 'i j' remain = 100 for flow in link_flow_records[i][j]: remain -= ee_throughput[flow[0]][flow[1]] link_residue[i][j] = remain link_residue[j][i] = remain # increase each flow throughput, update link_residue for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] temp_min = 9999 for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] if link_residue[node1.id][node2.id] < temp_min: temp_min = link_residue[node1.id][node2.id] # increase if temp_min == 0: continue ee_throughput[i][j] += temp_min # update for k in range(len(path) - 1): node1 = path[k] node2 = path[k + 1] link_residue[node1.id][node2.id] -= temp_min link_residue[node2.id][node1.id] -= temp_min states_ = collections.defaultdict(list) for i in range(self.N): for j in range(len(self.generator.matrix[i])): if self.generator.matrix[i][j] == 1: states_[i].append(link_load[i][j]) # if link_load[i][j] == 0: # states_[i].append(100) # else: # states_[i].append(100 / link_load[i][j]) rewards = {} for agent in self.RLs: temp_table = collections.defaultdict(list) for des in agent.table: temp_table[des].append(0) for des in agent.table_peer: temp_table[des].append(0) for des in agent.table_provider: temp_table[des].append(0) sum_flow = 0 sum_ee = 0 for i in actual_flow.keys(): for j in actual_flow[i].keys(): path = actual_flow[i][j] if agent in path and agent is not path[-1]: sum_flow += 1 sum_ee += ee_throughput[i][j] temp_table[j].append(ee_throughput[i][j]) if sum_flow == 0: rewards[agent.id] = 0 else: rewards[agent.id] = sum_ee / sum_flow for i in temp_table: avg = sum(temp_table[i]) / len(temp_table[i]) states_[agent.id].append(avg) sum_all = 0 for i in range(self.N): for j in range(self.N): sum_all += ee_throughput[i][j] return states_, rewards, sum_all, path_return