コード例 #1
0
    def train(self, training_set):

        loss_average = []
        iteration = 0
        start = time.time()
        # 训练开始
        while iteration < self.num_epoch:
            values = []
            print("Iteration %s" % iteration)
            # 每轮训练开始前,都需要重置底层网络和相关的强化学习环境
            sub_copy = copy.deepcopy(self.sub)
            env = NodeEnv(self.sub)
            # 创建存储参数梯度的缓冲器
            grad_buffer = self.sess.run(self.tvars)
            # 初始化为0
            for ix, grad in enumerate(grad_buffer):
                grad_buffer[ix] = grad * 0
            # 记录已经处理的虚拟网络请求数量
            counter = 0
            for req in training_set:
                # 当前待映射的虚拟网络请求ID
                req_id = req.graph['id']

                if req.graph['type'] == 0:

                    print("\nTraining req%s..." % req_id)
                    print("node mapping...")
                    counter += 1
                    sub_copy.total_arrived = counter
                    # 向环境传入当前的待映射虚拟网络
                    env.set_vnr(req)
                    # 获得底层网络的状态
                    observation = env.reset()

                    node_map = {}
                    xs, acts = [], []
                    for vn_id in range(req.number_of_nodes()):
                        x = np.reshape(
                            observation,
                            [1, observation.shape[0], observation.shape[1], 1])

                        sn_id = self.choose_action(observation, sub_copy,
                                                   req.nodes[vn_id]['cpu'],
                                                   acts)

                        if sn_id == -1:
                            break
                        else:
                            # 输入的环境信息添加到xs列表中
                            xs.append(x)
                            # 将选择的动作添加到acts列表中
                            acts.append(sn_id)
                            # 执行一次action,获取返回的四个数据
                            observation, _, done, info = env.step(sn_id)
                            node_map.update({vn_id: sn_id})
                    # end for,即一个VNR的全部节点映射全部尝试完毕

                    if len(node_map) == req.number_of_nodes():

                        link_map = Network.cut_then_find_path(
                            sub_copy, req, node_map)
                        reward = Evaluation.revenue_to_cost_ratio(
                            req, link_map)

                        if reward != -1:
                            epx = np.vstack(xs)
                            epy = np.eye(self.n_actions)[acts]
                            # 返回损失函数值
                            loss_value = self.sess.run(self.loss,
                                                       feed_dict={
                                                           self.tf_obs: epx,
                                                           self.input_y: epy
                                                       })

                            print("Success! The loss value is: %s" %
                                  loss_value)
                            values.append(loss_value)

                            # 返回求解梯度
                            tf_grad = self.sess.run(self.newGrads,
                                                    feed_dict={
                                                        self.tf_obs: epx,
                                                        self.input_y: epy
                                                    })
                            # 将获得的梯度累加到gradBuffer中
                            for ix, grad in enumerate(tf_grad):
                                grad_buffer[ix] += grad
                            grad_buffer[0] *= reward
                            grad_buffer[1] *= reward

                            # 分配资源
                            Network.allocate(sub_copy, req, node_map, link_map)
                        else:
                            print("Failure!")

                    # 当实验次数达到batch size整倍数,累积的梯度更新一次参数
                    if counter % self.batch_size == 0:
                        self.sess.run(self.update_grads,
                                      feed_dict={
                                          self.kernel_grad: grad_buffer[0],
                                          self.biases_grad: grad_buffer[1]
                                      })

                        # 清空gradBuffer
                        for ix, grad in enumerate(grad_buffer):
                            grad_buffer[ix] = grad * 0

                if req.graph['type'] == 1:
                    # 收回该请求占用的资源
                    Network.recover(sub_copy, req)

                env.set_sub(sub_copy)

            loss_average.append(np.mean(values))
            iteration = iteration + 1

        end = (time.time() - start) / 3600
        tool = Analysis('results_loss/')
        tool.save_loss(end, self.num_epoch, loss_average, "RLN")
コード例 #2
0
ファイル: reinforce.py プロジェクト: ShuaiYans/RL-VNE
    def train(self, training_set):

        loss_average = []
        iteration = 0
        start = time.time()
        # 训练开始
        while iteration < self.num_epoch:
            values = []
            print("Iteration %s" % iteration)
            # 每轮训练开始前,都需要重置底层网络和相关的强化学习环境
            sub_copy = copy.deepcopy(self.sub)
            env = NodeEnv(self.sub.net)
            # 创建存储参数梯度的缓冲器
            grad_buffer = self.sess.run(self.tvars)
            # 初始化为0
            for ix, grad in enumerate(grad_buffer):
                grad_buffer[ix] = grad * 0
            # 记录已经处理的虚拟网络请求数量
            counter = 0
            for req in training_set:
                # 当前待映射的虚拟网络请求ID
                req_id = req.graph['id']
                print("\nHandling req%s..." % req_id)

                if req.graph['type'] == 0:

                    print("\tIt's a newly arrived request, try to map it...")
                    counter += 1
                    sub_copy.total_arrived = counter
                    # 向环境传入当前的待映射虚拟网络
                    env.set_vnr(req)
                    # 获得底层网络的状态
                    observation = env.reset()

                    node_map = {}
                    xs, acts = [], []
                    for vn_id in range(req.number_of_nodes()):
                        x = np.reshape(
                            observation,
                            [1, observation.shape[0], observation.shape[1], 1])

                        sn_id = self.choose_action(observation, sub_copy.net,
                                                   req.nodes[vn_id]['cpu'],
                                                   acts)

                        if sn_id == -1:
                            break
                        else:
                            # 输入的环境信息添加到xs列表中
                            xs.append(x)
                            # 将选择的动作添加到acts列表中
                            acts.append(sn_id)
                            # 执行一次action,获取返回的四个数据
                            observation, _, done, info = env.step(sn_id)
                            node_map.update({vn_id: sn_id})
                    # end for,即一个VNR的全部节点映射全部尝试完毕

                    if len(node_map) == req.number_of_nodes():

                        reward, link_map = self.calculate_reward(
                            sub_copy, req, node_map)

                        if reward != -1:
                            epx = np.vstack(xs)
                            epy = np.eye(self.n_actions)[acts]
                            # 返回损失函数值
                            loss_value = self.sess.run(self.loss,
                                                       feed_dict={
                                                           self.tf_obs: epx,
                                                           self.input_y: epy
                                                       })

                            print("Success! The loss value is: %s" %
                                  loss_value)
                            values.append(loss_value)

                            # 返回求解梯度
                            tf_grad = self.sess.run(self.newGrads,
                                                    feed_dict={
                                                        self.tf_obs: epx,
                                                        self.input_y: epy
                                                    })
                            # 将获得的梯度累加到gradBuffer中
                            for ix, grad in enumerate(tf_grad):
                                grad_buffer[ix] += grad
                            grad_buffer[0] *= reward
                            grad_buffer[1] *= reward

                            # 更新底层网络
                            sub_copy.mapped_info.update(
                                {req.graph['id']: (node_map, link_map)})
                            sub_copy.change_resource(req, 'allocate')
                        else:
                            print("Failure!")

                    # 当实验次数达到batch size整倍数,累积的梯度更新一次参数
                    if counter % self.batch_size == 0:
                        self.sess.run(self.update_grads,
                                      feed_dict={
                                          self.kernel_grad: grad_buffer[0],
                                          self.biases_grad: grad_buffer[1]
                                      })

                        # 清空gradBuffer
                        for ix, grad in enumerate(grad_buffer):
                            grad_buffer[ix] = grad * 0

                if req.graph['type'] == 1:

                    print("\tIt's time is out, release the occupied resources")
                    if req_id in sub_copy.mapped_info.keys():
                        sub_copy.change_resource(req, 'release')

                env.set_sub(sub_copy.net)

            loss_average.append(np.mean(values))
            iteration = iteration + 1

        end = (time.time() - start) / 3600
        with open('results/nodeloss-%s.txt' % self.num_epoch, 'w') as f:
            f.write("Training time: %s hours\n" % end)
            for value in loss_average:
                f.write(str(value))
                f.write('\n')
コード例 #3
0
ファイル: linkrf.py プロジェクト: ShuaiYans/RL-VNE
    def train(self, training_set):

        loss_average = []
        iteration = 0
        start = time.time()
        # 训练开始
        while iteration < self.num_epoch:
            values = []
            print("Iteration %s" % iteration)
            # 每轮训练开始前,都需要重置底层网络和相关的强化学习环境
            sub_copy = copy.deepcopy(self.sub)
            nodeenv = NodeEnv(self.sub.net)
            nodep = nodepolicy(nodeenv.action_space.n,nodeenv.observation_space.shape)
            linkenv = LinkEnv(self.sub.net)
            # 创建存储参数梯度的缓冲器
            grad_buffer = self.sess.run(self.tvars)
            # 初始化为0
            for ix, grad in enumerate(grad_buffer):
                grad_buffer[ix] = grad * 0
            # 记录已经处理的虚拟网络请求数量
            counter = 0
            for req in training_set:
                # 当前待映射的虚拟网络请求ID
                req_id = req.graph['id']
                print("\nHandling req%s..." % req_id)

                if req.graph['type'] == 0:

                    print("\tIt's a newly arrived request, try to map it...")
                    counter += 1
                    sub_copy.total_arrived = counter
                    # 向环境传入当前的待映射虚拟网络
                    nodeenv.set_vnr(req)
                    # 获得底层网络的状态
                    nodeobservation = nodeenv.reset()

                    node_map = {}
                    for vn_id in range(req.number_of_nodes()):

                        sn_id = nodep.choose_max_action(nodeobservation,nodeenv.sub,req.nodes[vn_id]['cpu'],req.number_of_nodes())
                        if sn_id == -1:
                            break
                        else:
                            # 执行一次action,获取返回的四个数据
                            nodeobservation, _, done, info = nodeenv.step(sn_id)
                            node_map.update({vn_id: sn_id})
                    # end for,即一个VNR的全部节点映射全部尝试完毕

                    if len(node_map) == req.number_of_nodes():
                        print('link mapping...')
                        linkenv.set_vnr(req)
                        linkob=linkenv.reset()
                        link_map = {}
                        xs, acts = [], []
                        for link in req.edges:
                            linkenv.set_link(link)
                            vn_from = link[0]
                            vn_to = link[1]
                            sn_from = node_map[vn_from]
                            sn_to = node_map[vn_to]
                            bw = req[vn_from][vn_to]['bw']
                            if nx.has_path(linkenv.sub, sn_from, sn_to):

                                x = np.reshape(linkob, [1, linkob.shape[0], linkob.shape[1], 1])
                                linkaction = self.choose_action(linkob, linkenv.sub, bw, self.linkpath, sn_from, sn_to)
                                if linkaction == -1:
                                    break
                                else:
                                    # 输入的环境信息添加到xs列表中
                                    xs.append(x)
                                    # 将选择的动作添加到acts列表中
                                    acts.append(linkaction)
                                    # 执行一次action,获取返回的四个数据
                                    linkob, _, done, info = linkenv.step(linkaction)
                                    path = list(self.linkpath[linkaction].values())[0]
                                    link_map.update({link: path})


                        if len(link_map) == req.number_of_edges():

                            reward=self.calculate_reward(req,node_map,link_map)

                            ys = tf.one_hot(acts, self.n_actions)
                            epx = np.vstack(xs)
                            epy = tf.Session().run(ys)

                            # 返回损失函数值
                            loss_value = self.sess.run(self.loss,
                                                       feed_dict={self.tf_obs: epx,
                                                                  self.input_y: epy})
                            print("Success! The loss value is: %s" % loss_value)
                            values.append(loss_value)

                            # 返回求解梯度
                            tf_grad = self.sess.run(self.newGrads,
                                                    feed_dict={self.tf_obs: epx,
                                                               self.input_y: epy})
                            # 将获得的梯度累加到gradBuffer中
                            for ix, grad in enumerate(tf_grad):
                                grad_buffer[ix] += grad
                            grad_buffer[0] *= reward
                            grad_buffer[1] *= reward

                            # 更新底层网络
                            sub_copy.mapped_info.update({req.graph['id']: (node_map, link_map)})
                            sub_copy.change_resource(req, 'allocate')
                        else:
                            print("Failure!")
                    else:
                        print("Failure!")

                    if counter % self.batch_size == 0:
                        self.sess.run(self.update_grads,
                                      feed_dict={self.kernel_grad: grad_buffer[0],
                                                 self.biases_grad: grad_buffer[1]})

                        # 清空gradBuffer
                        for ix, grad in enumerate(grad_buffer):
                            grad_buffer[ix] = grad * 0

                if req.graph['type'] == 1:
                    print("\tIt's time is out, release the occupied resources")
                    if req_id in sub_copy.mapped_info.keys():
                        sub_copy.change_resource(req, 'release')
                nodeenv.set_sub(sub_copy.net)
                linkenv.set_sub(sub_copy.net)

            loss_average.append(np.mean(values))
            iteration = iteration + 1

        end = (time.time() - start) / 3600
        with open('results/linklosslog-%s.txt' % self.num_epoch, 'w') as f:
            f.write("Training time: %s hours\n" % end)
            for value in loss_average:
                f.write(str(value))
                f.write('\n')