def node_mapping(self, vnr, algorithm, arg): """求解节点映射问题""" print("node mapping...") node_map = {} # 如果刚开始映射,那么需要对所选用的算法进行配置 if algorithm != 'RLNL': if self.agent is None: self.agent = configure(self, algorithm, arg) node_map = self.agent.run(self, vnr) else: nodeenv = NodeEnv(self.net) nodeenv.set_vnr(vnr) nodep = nodepolicy(nodeenv.action_space.n, nodeenv.observation_space.shape) nodeobservation = nodeenv.reset() for vn_id in range(vnr.number_of_nodes()): sn_id = nodep.choose_max_action(nodeobservation, nodeenv.sub, vnr.nodes[vn_id]['cpu'], vnr.number_of_nodes()) if sn_id == -1: break else: # 执行一次action,获取返回的四个数据 nodeobservation, _, done, info = nodeenv.step(sn_id) node_map.update({vn_id: sn_id}) # 使用指定的算法进行节点映射并得到节点映射集合 # 返回节点映射集合 return node_map
def run(self, sub, req): """基于训练后的策略网络,直接得到每个虚拟网络请求的节点映射集合""" node_map = {} env = NodeEnv(sub.net) env.set_vnr(req) observation = env.reset() acts = [] for vn_id in range(req.number_of_nodes()): sn_id = self.choose_max_action(observation, sub.net, req.nodes[vn_id]['cpu'], acts) if sn_id == -1: break else: acts.append(sn_id) observation, _, done, info = env.step(sn_id) node_map.update({vn_id: sn_id}) return node_map
def train(self, training_set): loss_average = [] iteration = 0 start = time.time() # 训练开始 while iteration < self.num_epoch: values = [] print("Iteration %s" % iteration) # 每轮训练开始前,都需要重置底层网络和相关的强化学习环境 sub_copy = copy.deepcopy(self.sub) nodeenv = NodeEnv(self.sub) nodep = nodepolicy(nodeenv.action_space.n, nodeenv.observation_space.shape) linkenv = LinkEnv(self.sub) # 创建存储参数梯度的缓冲器 grad_buffer = self.sess.run(self.tvars) # 初始化为0 for ix, grad in enumerate(grad_buffer): grad_buffer[ix] = grad * 0 # 记录已经处理的虚拟网络请求数量 counter = 0 for req in training_set: # 当前待映射的虚拟网络请求ID req_id = req.graph['id'] if req.graph['type'] == 0: print("\nTraining req%s..." % req_id) print('node mapping...') counter += 1 sub_copy.total_arrived = counter # 向环境传入当前的待映射虚拟网络 nodeenv.set_vnr(req) # 获得底层网络的状态 nodeobservation = nodeenv.reset() node_map = {} for vn_id in range(req.number_of_nodes()): sn_id = nodep.choose_max_action( nodeobservation, nodeenv.sub, req.nodes[vn_id]['cpu'], req.number_of_nodes()) if sn_id == -1: break else: # 执行一次action,获取返回的四个数据 nodeobservation, _, done, info = nodeenv.step( sn_id) node_map.update({vn_id: sn_id}) # end for,即一个VNR的全部节点映射全部尝试完毕 if len(node_map) == req.number_of_nodes(): print('link mapping...') linkenv.set_vnr(req) linkob = linkenv.reset() link_map = {} xs, acts = [], [] for link in req.edges: linkenv.set_link(link) vn_from = link[0] vn_to = link[1] sn_from = node_map[vn_from] sn_to = node_map[vn_to] bw = req[vn_from][vn_to]['bw'] if nx.has_path(linkenv.sub, sn_from, sn_to): x = np.reshape( linkob, [1, linkob.shape[0], linkob.shape[1], 1]) linkaction = self.choose_action( linkob, linkenv.sub, bw, self.linkpath, sn_from, sn_to) if linkaction == -1: break else: # 输入的环境信息添加到xs列表中 xs.append(x) # 将选择的动作添加到acts列表中 acts.append(linkaction) # 执行一次action,获取返回的四个数据 linkob, _, done, info = linkenv.step( linkaction) path = list( self.linkpath[linkaction].values())[0] link_map.update({link: path}) if len(link_map) == req.number_of_edges(): reward = Evaluation.revenue_to_cost_ratio( req, link_map) ys = tf.one_hot(acts, self.n_actions) epx = np.vstack(xs) epy = tf.Session().run(ys) # 返回损失函数值 loss_value = self.sess.run(self.loss, feed_dict={ self.tf_obs: epx, self.input_y: epy }) print("Success! The loss value is: %s" % loss_value) values.append(loss_value) # 返回求解梯度 tf_grad = self.sess.run(self.newGrads, feed_dict={ self.tf_obs: epx, self.input_y: epy }) # 将获得的梯度累加到gradBuffer中 for ix, grad in enumerate(tf_grad): grad_buffer[ix] += grad grad_buffer[0] *= reward grad_buffer[1] *= reward # 分配资源 Network.allocate(sub_copy, req, node_map, link_map) else: print("Failure!") else: print("Failure!") if counter % self.batch_size == 0: self.sess.run(self.update_grads, feed_dict={ self.kernel_grad: grad_buffer[0], self.biases_grad: grad_buffer[1] }) # 清空gradBuffer for ix, grad in enumerate(grad_buffer): grad_buffer[ix] = grad * 0 if req.graph['type'] == 1: # 收回该请求占用的资源 Network.recover(sub_copy, req) loss_average.append(np.mean(values)) iteration = iteration + 1 end = (time.time() - start) / 3600 tool = Analysis('results_loss/') tool.save_loss(end, self.num_epoch, loss_average, "RLNL")
def train(self, training_set): loss_average = [] iteration = 0 start = time.time() # 训练开始 while iteration < self.num_epoch: values = [] print("Iteration %s" % iteration) # 每轮训练开始前,都需要重置底层网络和相关的强化学习环境 sub_copy = copy.deepcopy(self.sub) env = NodeEnv(self.sub.net) # 创建存储参数梯度的缓冲器 grad_buffer = self.sess.run(self.tvars) # 初始化为0 for ix, grad in enumerate(grad_buffer): grad_buffer[ix] = grad * 0 # 记录已经处理的虚拟网络请求数量 counter = 0 for req in training_set: # 当前待映射的虚拟网络请求ID req_id = req.graph['id'] print("\nHandling req%s..." % req_id) if req.graph['type'] == 0: print("\tIt's a newly arrived request, try to map it...") counter += 1 sub_copy.total_arrived = counter # 向环境传入当前的待映射虚拟网络 env.set_vnr(req) # 获得底层网络的状态 observation = env.reset() node_map = {} xs, acts = [], [] for vn_id in range(req.number_of_nodes()): x = np.reshape( observation, [1, observation.shape[0], observation.shape[1], 1]) sn_id = self.choose_action(observation, sub_copy.net, req.nodes[vn_id]['cpu'], acts) if sn_id == -1: break else: # 输入的环境信息添加到xs列表中 xs.append(x) # 将选择的动作添加到acts列表中 acts.append(sn_id) # 执行一次action,获取返回的四个数据 observation, _, done, info = env.step(sn_id) node_map.update({vn_id: sn_id}) # end for,即一个VNR的全部节点映射全部尝试完毕 if len(node_map) == req.number_of_nodes(): reward, link_map = self.calculate_reward( sub_copy, req, node_map) if reward != -1: epx = np.vstack(xs) epy = np.eye(self.n_actions)[acts] # 返回损失函数值 loss_value = self.sess.run(self.loss, feed_dict={ self.tf_obs: epx, self.input_y: epy }) print("Success! The loss value is: %s" % loss_value) values.append(loss_value) # 返回求解梯度 tf_grad = self.sess.run(self.newGrads, feed_dict={ self.tf_obs: epx, self.input_y: epy }) # 将获得的梯度累加到gradBuffer中 for ix, grad in enumerate(tf_grad): grad_buffer[ix] += grad grad_buffer[0] *= reward grad_buffer[1] *= reward # 更新底层网络 sub_copy.mapped_info.update( {req.graph['id']: (node_map, link_map)}) sub_copy.change_resource(req, 'allocate') else: print("Failure!") # 当实验次数达到batch size整倍数,累积的梯度更新一次参数 if counter % self.batch_size == 0: self.sess.run(self.update_grads, feed_dict={ self.kernel_grad: grad_buffer[0], self.biases_grad: grad_buffer[1] }) # 清空gradBuffer for ix, grad in enumerate(grad_buffer): grad_buffer[ix] = grad * 0 if req.graph['type'] == 1: print("\tIt's time is out, release the occupied resources") if req_id in sub_copy.mapped_info.keys(): sub_copy.change_resource(req, 'release') env.set_sub(sub_copy.net) loss_average.append(np.mean(values)) iteration = iteration + 1 end = (time.time() - start) / 3600 with open('results/nodeloss-%s.txt' % self.num_epoch, 'w') as f: f.write("Training time: %s hours\n" % end) for value in loss_average: f.write(str(value)) f.write('\n')
def train(self, training_set): loss_average = [] iteration = 0 start = time.time() # 训练开始 while iteration < self.num_epoch: values = [] print("Iteration %s" % iteration) # 每轮训练开始前,都需要重置底层网络和相关的强化学习环境 sub_copy = copy.deepcopy(self.sub) env = NodeEnv(self.sub) # 创建存储参数梯度的缓冲器 grad_buffer = self.sess.run(self.tvars) # 初始化为0 for ix, grad in enumerate(grad_buffer): grad_buffer[ix] = grad * 0 # 记录已经处理的虚拟网络请求数量 counter = 0 for req in training_set: # 当前待映射的虚拟网络请求ID req_id = req.graph['id'] if req.graph['type'] == 0: print("\nTraining req%s..." % req_id) print("node mapping...") counter += 1 sub_copy.total_arrived = counter # 向环境传入当前的待映射虚拟网络 env.set_vnr(req) # 获得底层网络的状态 observation = env.reset() node_map = {} xs, acts = [], [] for vn_id in range(req.number_of_nodes()): x = np.reshape( observation, [1, observation.shape[0], observation.shape[1], 1]) sn_id = self.choose_action(observation, sub_copy, req.nodes[vn_id]['cpu'], acts) if sn_id == -1: break else: # 输入的环境信息添加到xs列表中 xs.append(x) # 将选择的动作添加到acts列表中 acts.append(sn_id) # 执行一次action,获取返回的四个数据 observation, _, done, info = env.step(sn_id) node_map.update({vn_id: sn_id}) # end for,即一个VNR的全部节点映射全部尝试完毕 if len(node_map) == req.number_of_nodes(): link_map = Network.cut_then_find_path( sub_copy, req, node_map) reward = Evaluation.revenue_to_cost_ratio( req, link_map) if reward != -1: epx = np.vstack(xs) epy = np.eye(self.n_actions)[acts] # 返回损失函数值 loss_value = self.sess.run(self.loss, feed_dict={ self.tf_obs: epx, self.input_y: epy }) print("Success! The loss value is: %s" % loss_value) values.append(loss_value) # 返回求解梯度 tf_grad = self.sess.run(self.newGrads, feed_dict={ self.tf_obs: epx, self.input_y: epy }) # 将获得的梯度累加到gradBuffer中 for ix, grad in enumerate(tf_grad): grad_buffer[ix] += grad grad_buffer[0] *= reward grad_buffer[1] *= reward # 分配资源 Network.allocate(sub_copy, req, node_map, link_map) else: print("Failure!") # 当实验次数达到batch size整倍数,累积的梯度更新一次参数 if counter % self.batch_size == 0: self.sess.run(self.update_grads, feed_dict={ self.kernel_grad: grad_buffer[0], self.biases_grad: grad_buffer[1] }) # 清空gradBuffer for ix, grad in enumerate(grad_buffer): grad_buffer[ix] = grad * 0 if req.graph['type'] == 1: # 收回该请求占用的资源 Network.recover(sub_copy, req) env.set_sub(sub_copy) loss_average.append(np.mean(values)) iteration = iteration + 1 end = (time.time() - start) / 3600 tool = Analysis('results_loss/') tool.save_loss(end, self.num_epoch, loss_average, "RLN")
def train(self, training_set): loss_average = [] iteration = 0 start = time.time() # 训练开始 while iteration < self.num_epoch: values = [] print("Iteration %s" % iteration) # 每轮训练开始前,都需要重置底层网络和相关的强化学习环境 sub_copy = copy.deepcopy(self.sub) nodeenv = NodeEnv(self.sub.net) nodep = nodepolicy(nodeenv.action_space.n,nodeenv.observation_space.shape) linkenv = LinkEnv(self.sub.net) # 创建存储参数梯度的缓冲器 grad_buffer = self.sess.run(self.tvars) # 初始化为0 for ix, grad in enumerate(grad_buffer): grad_buffer[ix] = grad * 0 # 记录已经处理的虚拟网络请求数量 counter = 0 for req in training_set: # 当前待映射的虚拟网络请求ID req_id = req.graph['id'] print("\nHandling req%s..." % req_id) if req.graph['type'] == 0: print("\tIt's a newly arrived request, try to map it...") counter += 1 sub_copy.total_arrived = counter # 向环境传入当前的待映射虚拟网络 nodeenv.set_vnr(req) # 获得底层网络的状态 nodeobservation = nodeenv.reset() node_map = {} for vn_id in range(req.number_of_nodes()): sn_id = nodep.choose_max_action(nodeobservation,nodeenv.sub,req.nodes[vn_id]['cpu'],req.number_of_nodes()) if sn_id == -1: break else: # 执行一次action,获取返回的四个数据 nodeobservation, _, done, info = nodeenv.step(sn_id) node_map.update({vn_id: sn_id}) # end for,即一个VNR的全部节点映射全部尝试完毕 if len(node_map) == req.number_of_nodes(): print('link mapping...') linkenv.set_vnr(req) linkob=linkenv.reset() link_map = {} xs, acts = [], [] for link in req.edges: linkenv.set_link(link) vn_from = link[0] vn_to = link[1] sn_from = node_map[vn_from] sn_to = node_map[vn_to] bw = req[vn_from][vn_to]['bw'] if nx.has_path(linkenv.sub, sn_from, sn_to): x = np.reshape(linkob, [1, linkob.shape[0], linkob.shape[1], 1]) linkaction = self.choose_action(linkob, linkenv.sub, bw, self.linkpath, sn_from, sn_to) if linkaction == -1: break else: # 输入的环境信息添加到xs列表中 xs.append(x) # 将选择的动作添加到acts列表中 acts.append(linkaction) # 执行一次action,获取返回的四个数据 linkob, _, done, info = linkenv.step(linkaction) path = list(self.linkpath[linkaction].values())[0] link_map.update({link: path}) if len(link_map) == req.number_of_edges(): reward=self.calculate_reward(req,node_map,link_map) ys = tf.one_hot(acts, self.n_actions) epx = np.vstack(xs) epy = tf.Session().run(ys) # 返回损失函数值 loss_value = self.sess.run(self.loss, feed_dict={self.tf_obs: epx, self.input_y: epy}) print("Success! The loss value is: %s" % loss_value) values.append(loss_value) # 返回求解梯度 tf_grad = self.sess.run(self.newGrads, feed_dict={self.tf_obs: epx, self.input_y: epy}) # 将获得的梯度累加到gradBuffer中 for ix, grad in enumerate(tf_grad): grad_buffer[ix] += grad grad_buffer[0] *= reward grad_buffer[1] *= reward # 更新底层网络 sub_copy.mapped_info.update({req.graph['id']: (node_map, link_map)}) sub_copy.change_resource(req, 'allocate') else: print("Failure!") else: print("Failure!") if counter % self.batch_size == 0: self.sess.run(self.update_grads, feed_dict={self.kernel_grad: grad_buffer[0], self.biases_grad: grad_buffer[1]}) # 清空gradBuffer for ix, grad in enumerate(grad_buffer): grad_buffer[ix] = grad * 0 if req.graph['type'] == 1: print("\tIt's time is out, release the occupied resources") if req_id in sub_copy.mapped_info.keys(): sub_copy.change_resource(req, 'release') nodeenv.set_sub(sub_copy.net) linkenv.set_sub(sub_copy.net) loss_average.append(np.mean(values)) iteration = iteration + 1 end = (time.time() - start) / 3600 with open('results/linklosslog-%s.txt' % self.num_epoch, 'w') as f: f.write("Training time: %s hours\n" % end) for value in loss_average: f.write(str(value)) f.write('\n')