def main():
    ue_num = [60, 11, 240, 124]
    r_s = [0.23, 0.255, 0.25, 0.137]  # [0.23, 0.3, 0.26, 0.21]

    # ue_num = [36, 8, 180, 88]
    # ue_num = [44, 9, 200, 100]
    # ue_num = [52, 10, 220, 112]
    # ue_num = [60, 11, 240, 124]

    # r_s = [0.2165, 0.3712, 0.2123, 0.2]
    # r_s = [0.2204, 0.3632, 0.2163, 0.2]
    # r_s = [0.2298, 0.3323, 0.2379, 0.2]
    # r_s = [0.23, 0.3107, 0.2593, 0.2]
    isolation_slice_sat = np.zeros([MAX_EPISODES, 4])
    isolation_slice_ru = np.zeros([MAX_EPISODES, 4])
    for episode in range(MAX_EPISODES):
        env = SimulationEnv()
        TPs = env.generate_bs(bs_num=4)
        if episode > 10:
            ue_num[2] = 420
        ues = env.generate_ue(ue_num=ue_num)
        r_sk = np.ones([4, 4])  # 资源比例初始化
        for si in range(4):
            r_sk[si, :] = r_s[si]

        # user association
        association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                      TPs=TPs)

        # generate all ue subframe index
        data_num = 20
        slice_user_seq, max_index = env.generate_subframe_index(
            association_ues, lamda_k=env.lamda_k, data_num=data_num, mi=1)

        # physical resource allocation
        pr = PhysicalResource(TPs=TPs, user_qos=env.user_qos, env=env)
        slice_sat_ratio, slice_avg_RU, slice_bs_sat, slice_bs_RU, slice_spectral_efficiency = pr.allocate_isolation(
            association_ues, r_sk, max_index, rb_size)
        print(slice_sat_ratio)
        print(slice_avg_RU)

        isolation_slice_sat[episode, :] = slice_sat_ratio
        isolation_slice_ru[episode, :] = slice_avg_RU
    # 保存变量
    time_str = utils.cur_time()
    print(
        '-----------------STATISTICAL VARIABLES %s HAVE SAVED-----------------'
        % time_str)
    np.save('isolation_slice_sat_' + time_str + '.npy', isolation_slice_sat)
    np.save('isolation_slice_ru_' + time_str + '.npy', isolation_slice_ru)
def main():
    ue_num = [60, 11, 240, 124]
    r_s = [0.23, 0.255, 0.25, 0.137]  # [0.188, 0.255, 0.216, 0.137]
    r_s_dqn = [0.169, 0.288, 0.25, 0.15]  # [0.169, 0.288, 0.217, 0.15]
    env = SimulationEnv()
    TPs = env.generate_bs(bs_num=4)
    ues = env.generate_ue(ue_num=ue_num)

    user_qos = np.array(
        [env.rate_demands, env.delay_demands, env.lamda_k, env.packet_size])
    global_buffer_length = {}
    index = 0
    for mi in range(4):

        if mi == 0:  # Dueling DQN
            r_sk = np.ones([4, 4])  # 资源比例初始化
            for si in range(4):
                r_sk[si, :] = r_s[si]
            # user association
            association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                          TPs=TPs)

            # generate all ue subframe index
            data_num = 100
            slice_user_seq, max_index = env.generate_subframe_index(
                association_ues, lamda_k=env.lamda_k, data_num=data_num, mi=2)
        elif mi == 1:  # DQN
            r_sk = np.ones([4, 4])  # 资源比例初始化
            for si in range(4):
                r_sk[si, :] = r_s_dqn[si]
            # user association
            association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                          TPs=TPs)

            # generate all ue subframe index
            data_num = 100
            slice_user_seq, max_index = env.generate_subframe_index(
                association_ues, lamda_k=env.lamda_k, data_num=data_num, mi=2)
        elif mi == 2:  # NetShare
            # user association
            association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                          TPs=TPs,
                                                          mi=1)

            # generate all ue subframe index
            data_num = 100
            slice_user_seq, max_index = env.generate_subframe_index(
                association_ues, lamda_k=env.lamda_k, data_num=data_num, mi=2)
            r_sk, success = netshare.slice_ra(user_qos, association_ues,
                                              env.slice_num, env.bs_num,
                                              env.sub_channel_num,
                                              env.sub_frame_num)
            r_sk *= 0.8
        else:  # NVS
            # user association
            association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                          TPs=TPs,
                                                          mi=1)

            # generate all ue subframe index
            data_num = 100
            slice_user_seq, max_index = env.generate_subframe_index(
                association_ues, lamda_k=env.lamda_k, data_num=data_num, mi=2)
            r_sk = nvs.slice_ra(env.rate_demands, association_ues,
                                env.slice_num, env.bs_num)
            r_sk *= 0.8

        # physical resource allocation
        pr = PhysicalResource(TPs=TPs, user_qos=env.user_qos, env=env)
        slice_sat_ratio, slice_avg_RU, slice_bs_sat, slice_bs_RU, slice_spectral_efficiency, global_slice_queue_len = \
            pr.allocate(association_ues, r_sk, max_index, rb_size)
        global_buffer_length[mi] = global_slice_queue_len
    # 保存变量
    time_str = utils.cur_time()
    print(
        '-----------------STATISTICAL VARIABLES %s HAVE SAVED-----------------'
        % time_str)
    # np.save('global_buffer_length_' + time_str + '.npy', global_buffer_length)
    pickle.dump(global_buffer_length,
                open('./global_buffer_length_' + time_str + '.npy', 'wb'))
def train():
    step = 0  # 用来控制什么时候学习
    sys_sat_list = []
    sys_RU_list = []
    sys_reward_list = []
    index = 0  # 统计变量数组的下标
    max_ue_num = [32, 11, 240, 124]  # 每类切片的最大用户数量,用来做归一化

    for episode in range(MAX_EPISODES):
        print('-----------NEW EPISODE %d STARTING-------------' % episode)
        # 初始化环境
        ud = 0
        slice1_ud = np.arange(1, 8)  # [0, 4, 4, 4, 8, 8, 8]
        ue_num = [4, 4, 100, 40]

        env = SimulationEnv()
        TPs = env.generate_bs(bs_num=4)
        ues = env.generate_ue(ue_num=ue_num)

        r_sk = np.ones([4, 4]) * 0.1  # 资源比例初始化
        r_sk[1, :] = 0.2
        r_sk[2, :] = 0.13

        # user association
        association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                      TPs=TPs)

        # generate all ue subframe index
        data_num = 20
        slice_user_seq, max_index = env.generate_subframe_index(
            association_ues, lamda_k=env.lamda_k, data_num=data_num)

        # physical resource allocation
        pr = PhysicalResource(TPs=TPs, user_qos=env.user_qos, env=env)
        slice_sat_ratio, slice_avg_RU, slice_bs_sat, slice_bs_RU, slice_spectral_efficiency, global_slice_queue_len = \
            pr.allocate(association_ues, r_sk, max_index, rb_size)

        slice_load = np.array(ue_num, dtype=np.float) / np.array(
            max_ue_num, dtype=np.float)  # 切片负载情况
        observation = np.concatenate((slice_load, slice_sat_ratio))  # 初始化状态

        sys_sat, sys_RU, sys_reward = [], [], []
        for j in range(MAX_EP_STEPS):
            # 刷新环境
            ud += 20
            ue_num = [
                4 + int(np.ceil(ud * 2 / 5)), 4 + slice1_ud[j], 100 + ud,
                40 + int(np.ceil(ud * 3 / 5))
            ]
            ues = env.generate_ue(ue_num=ue_num)

            # user association
            association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                          TPs=TPs)

            # generate all ue subframe index
            data_num = 20
            slice_user_seq, max_index = env.generate_subframe_index(
                association_ues, lamda_k=env.lamda_k, data_num=data_num)

            # DQN 根据观测值选择行为
            action, is_random = RL.choose_action(get_state(j, observation[4:]))

            # 环境根据行为给出下一个 state, reward
            observation_, reward, r_sk_, slice_bs_sat, slice_bs_RU, real_action, \
                r_allocated, r_reserved, slice_spectral_efficiency = pr.step(action, association_ues,
                                                                             r_sk.copy(), max_index, rb_size)

            slice_load = np.array(ue_num, dtype=np.float) / np.array(
                max_ue_num, dtype=np.float)  # 切片负载情况
            next_state = np.concatenate(
                (slice_load, observation_[4:8]))  # 下一个状态
            # RL learn from this transition
            RL.learn(get_state(j, observation[4:]), action,
                     np.min(reward[0:4]), get_state(j + 1, next_state[4:]))

            # 记录统计量
            sys_sat.append(np.mean(observation_[4:8]))
            sys_RU.append(np.mean(observation_[8:12]))
            sys_reward.append(reward[4])
            global_statistics[index, :] = np.concatenate(
                ([episode], ue_num, r_sk.flatten(), real_action, [is_random],
                 r_sk_.flatten(), observation_[4:8], observation_[8:12],
                 reward, slice_bs_sat.flatten(), slice_bs_RU.flatten(),
                 r_allocated, r_reserved, slice_spectral_efficiency))

            # 将下一个 state_ 变为下次循环的state
            observation = next_state
            r_sk = r_sk_

            step += 1  # 总步数
            index += 1  # 下标自增

        sys_RU_list.append(np.mean(sys_RU))
        sys_sat_list.append(np.mean(sys_sat))
        sys_reward_list.append(np.mean(sys_reward))
    # 保存变量
    time_str = utils.cur_time()
    print(
        '-----------------STATISTICAL VARIABLES %s HAVE SAVED-----------------'
        % time_str)
    np.save('sys_sat_list_' + time_str + '.npy', sys_sat_list)
    np.save('sys_RU_list_' + time_str + '.npy', sys_RU_list)
    np.save('sys_reward_list_' + time_str + '.npy', sys_reward_list)
    # np.save('cost_his_' + time_str + '.npy', RL.cost_his)
    np.save('global_statistics_' + time_str + '.npy', global_statistics)
    # 保存模型
    # RL.save(time_str)
    print('-----------------MODEL HAS SAVED-----------------')

    # plot(sys_RU_list, sys_sat_list)
    # end of train
    print('-----------------TRAIN OVER--------------------')
Beispiel #4
0
def ue_num_change():
    slice1_ud = np.arange(1, 8)  # [0, 4, 4, 4, 8, 8, 8]
    ud = 0
    global_sat_ratio = np.zeros([7, 9])
    global_slice_avg_RU = np.zeros([7, 9])
    global_res_allocated = np.zeros([7, 9])
    global_res_used = np.zeros([7, 9])

    for j in range(7):
        ud += 20
        ue_num = [4 + int(np.ceil(ud * 2 / 5)), 4 + slice1_ud[j], 100 + ud, 40 + int(np.ceil(ud * 3 / 5))]
        env = SimulationEnv()
        user_qos = np.array([env.rate_demands, env.delay_demands, env.lamda_k, env.packet_size])
        # user_qos[0, 2] = 100

        print('-----------NEW EPISODE %d STARTING-------------' % j)

        # [nvs, netShare]
        temp_slice_sat_ratio = np.zeros([3, 8])
        temp_slice_avg_RU = np.zeros([3, 8])
        temp_res_allocated = np.zeros([3, 8])
        temp_res_used = np.zeros([3, 8])

        # 取消位置的影响
        for ci in range(3):
            TPs = env.generate_bs(bs_num=4)
            ues = env.generate_ue(ue_num=ue_num)

            for mi in range(2):
                # user association
                association_ues, rb_size = env.ue_association(admission_ues=ues, TPs=TPs, mi=mi)

                # generate all ue subframe index
                data_num = 20
                slice_user_seq, max_index = env.generate_subframe_index(association_ues, lamda_k=env.lamda_k,
                                                                        data_num=data_num)
                # two methods of slice resource allocation
                if mi == 0:
                    r_sk = nvs.slice_ra(env.rate_demands, association_ues, env.slice_num, env.bs_num)
                else:
                    r_sk, success = netshare.slice_ra(user_qos, association_ues, env.slice_num, env.bs_num,
                                                      env.sub_channel_num, env.sub_frame_num)

                r_sk *= 0.75  # 修改为allocated的结果
                # physical resource allocation
                pr = PhysicalResource(TPs=TPs, user_qos=env.user_qos, env=env)
                slice_sat_ratio, slice_avg_RU, slice_bs_sat, slice_bs_RU, slice_spectral_efficiency, global_slice_queue_len = \
                    pr.allocate(association_ues, r_sk.copy(), max_index, rb_size)

                r_allocated = np.sum(r_sk, axis=1) / 4  # allocated resource
                r_used = r_allocated * slice_avg_RU  # used resource

                offset = mi * 4
                temp_slice_sat_ratio[ci, offset:offset + 4] = slice_sat_ratio
                temp_slice_avg_RU[ci, offset:offset + 4] = slice_avg_RU
                temp_res_allocated[ci, offset:offset + 4] = r_allocated
                temp_res_used[ci, offset:offset + 4] = r_used

        global_sat_ratio[j] = np.concatenate(([np.sum(ue_num)], np.mean(temp_slice_sat_ratio, axis=0)))
        global_slice_avg_RU[j] = np.concatenate(([np.sum(ue_num)], np.mean(temp_slice_avg_RU, axis=0)))
        global_res_allocated[j] = np.concatenate(([np.sum(ue_num)], np.mean(temp_res_allocated, axis=0)))
        global_res_used[j] = np.concatenate(([np.sum(ue_num)], np.mean(temp_res_used, axis=0)))

    time_str = utils.cur_time()
    print('-----------------STATISTICAL VARIABLES %s HAVE SAVED-----------------' % time_str)
    np.save('global_sat_ratio_' + time_str + '.npy', global_sat_ratio)
    np.save('global_slice_avg_RU_' + time_str + '.npy', global_slice_avg_RU)
    np.save('global_res_allocated_' + time_str + '.npy', global_res_allocated)
    np.save('global_res_used_' + time_str + '.npy', global_res_used)
Beispiel #5
0
def train():
    step = 0  # 用来控制什么时候学习
    sys_sat_list = []
    sys_RU_list = []
    sys_reward_list = []
    index = 0  # 统计变量数组的下标
    max_ue_num = [60, 11, 240, 124]  # 每类切片的最大用户数量,用来做归一化[180, 12, 220, 200]

    for episode in range(MAX_EPISODES):
        print('-----------NEW EPISODE %d STARTING-------------' % episode)
        # 初始化环境
        ud = 0
        slice1_ud = np.arange(1, 8)  # [0, 4, 4, 4, 8, 8, 8]
        ue_num = [4, 4, 100, 40]
        env = SimulationEnv()
        TPs = env.generate_bs(bs_num=4)
        ues = env.generate_ue(ue_num=ue_num)

        r_sk = np.ones([4, 4]) * 0.1  # 资源比例初始化
        r_sk[1, :] = 0.2

        # user association
        association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                      TPs=TPs)

        # generate all ue subframe index
        data_num = 20
        slice_user_seq, max_index = env.generate_subframe_index(
            association_ues, lamda_k=env.lamda_k, data_num=data_num)

        # physical resource allocation
        pr = PhysicalResource(TPs=TPs, user_qos=env.user_qos, env=env)
        slice_sat_ratio, slice_avg_RU, slice_bs_sat, slice_bs_RU, slice_spectral_efficiency, global_slice_queue_len = \
            pr.allocate(association_ues, r_sk, max_index, rb_size)

        lamda_k = np.array(env.lamda_k)
        packet_size = np.array(env.packet_size)
        slice_load = np.array(ue_num, dtype=np.float) / np.array(
            max_ue_num, dtype=np.float)  # 切片负载情况
        # for si in range(4):
        #     slice_load[si] /= np.sum(max_ue_num * lamda_k * packet_size)
        observation = np.concatenate((slice_load, slice_sat_ratio))  # 初始化状态

        sys_sat, sys_RU, sys_reward = [], [], []
        for j in range(MAX_EP_STEPS):
            # 刷新环境
            ud += 20
            ue_num = [
                4 + int(np.ceil(ud * 2 / 5)), 4 + slice1_ud[j], 100 + ud,
                40 + int(np.ceil(ud * 3 / 5))
            ]
            ues = env.generate_ue(ue_num=ue_num)

            # user association
            association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                          TPs=TPs)

            # generate all ue subframe index
            data_num = 20
            slice_user_seq, max_index = env.generate_subframe_index(
                association_ues, lamda_k=env.lamda_k, data_num=data_num)

            # DQN 根据观测值选择行为
            action = RL.choose_action(observation)
            print('the output value of action is : ', action)
            # add randomness to action selection for exploration
            if np.random.uniform() < 0.2 and step < 400:
                action = np.clip(np.random.normal(action, 0.1), -0.2, 0.4)

            # 环境根据行为给出下一个 state, reward
            observation_, reward, r_sk_, slice_bs_sat, slice_bs_RU, real_action, r_allocated, r_reserved, \
                slice_spectral_efficiency = pr.step_ddpg(action, association_ues, r_sk.copy(), max_index, rb_size)

            slice_load = np.array(ue_num, dtype=np.float) / np.array(
                max_ue_num, dtype=np.float)  # 切片负载情况
            # for si in range(4):
            #     slice_load[si] /= np.sum(max_ue_num * lamda_k * packet_size)
            next_state = np.concatenate(
                (slice_load, observation_[4:8]))  # 下一个状态
            # DQN存储记忆
            RL.store_transition(observation, action, reward[4], next_state)

            # 控制学习起始时间和频率(先积累一些记忆再开始学习)
            if (step > 100) and (step % 1 == 0):
                RL.learn()

            # 记录统计量
            sys_sat.append(np.mean(observation_[4:8]))
            sys_RU.append(np.mean(observation_[8:12]))
            sys_reward.append(reward[4])
            is_random = 0  # DDPG没有这个变量,只是为了和DQN的方法接口统一,所以固定为0
            global_statistics[index, :] = np.concatenate(
                ([episode], ue_num, r_sk.flatten(), real_action, [is_random],
                 r_sk_.flatten(), observation_[4:8], observation_[8:12],
                 reward, slice_bs_sat.flatten(), slice_bs_RU.flatten(),
                 r_allocated, r_reserved, slice_spectral_efficiency))

            # 将下一个 state_ 变为下次循环的state
            observation = next_state
            r_sk = r_sk_

            step += 1  # 总步数
            index += 1  # 下标自增

        sys_RU_list.append(np.mean(sys_RU))
        sys_sat_list.append(np.mean(sys_sat))
        sys_reward_list.append(np.mean(sys_reward))
    # 保存变量
    time_str = utils.cur_time()
    print(
        '-----------------STATISTICAL VARIABLES %s HAVE SAVED-----------------'
        % time_str)
    np.save('sys_sat_list_' + time_str + '.npy', sys_sat_list)
    np.save('sys_RU_list_' + time_str + '.npy', sys_RU_list)
    np.save('sys_reward_list_' + time_str + '.npy', sys_reward_list)
    np.save('cost_his_' + time_str + '.npy', RL.cost_his)
    np.save('global_statistics_' + time_str + '.npy', global_statistics)
    # 保存模型
    RL.save(time_str)
    print('-----------------MODEL HAS SAVED-----------------')

    # end of train
    print('-----------------TRAIN OVER--------------------')
def backhaul_change():
    env = SimulationEnv()
    TPs = env.generate_bs(bs_num=4)
    user_qos = np.array(
        [env.rate_demands, env.delay_demands, env.lamda_k, env.packet_size])
    # statistic variables
    global_sat_ratio = np.zeros([20, 10])
    global_slice_avg_RU = np.zeros([20, 10])
    index = 0

    for bh in range(10):
        # change the backhaul of BS
        TPs['backhaul'] = np.ones(4) * bh * 1000 * 1000  # bps

        # light load and heavy load
        for ui in range(2):
            if ui == 0:
                ue_num = [20, 8, 140, 64]
            else:
                ue_num = [52, 12, 220, 112]

            # [nvs, netShare, dqn]
            temp_slice_sat_ratio = np.zeros([2, 12])
            temp_slice_avg_RU = np.zeros([2, 12])

            # 消除位置的影响
            for ci in range(2):
                ues = env.generate_ue(ue_num=ue_num)

                for mi in range(2):
                    # user association
                    association_ues, rb_size = env.ue_association(
                        admission_ues=ues, TPs=TPs)

                    # generate all ue subframe index
                    data_num = 20
                    slice_user_seq, max_index = env.generate_subframe_index(
                        association_ues,
                        lamda_k=env.lamda_k,
                        data_num=data_num)
                    if mi == 0:
                        r_sk = nvs.slice_ra(env.rate_demands, association_ues,
                                            env.slice_num, env.bs_num)
                    else:
                        r_sk, success = netshare.slice_ra(
                            user_qos, association_ues, env.slice_num,
                            env.bs_num, env.sub_channel_num, env.sub_frame_num)

                    # physical resource allocation
                    pr = PhysicalResource(TPs=TPs,
                                          user_qos=env.user_qos,
                                          env=env)
                    slice_sat_ratio, slice_avg_RU, slice_bs_sat, slice_bs_RU = pr.allocate(
                        association_ues, r_sk, max_index, rb_size)
                    offset = mi * 4
                    temp_slice_sat_ratio[ci,
                                         offset:offset + 4] = slice_sat_ratio
                    temp_slice_avg_RU[ci, offset:offset + 4] = slice_avg_RU

            global_sat_ratio[index] = np.concatenate(
                ([bh, np.sum(ue_num)], np.mean(temp_slice_sat_ratio, axis=0)))
            global_slice_avg_RU[index] = np.concatenate(
                ([bh, np.sum(ue_num)], np.mean(temp_slice_avg_RU, axis=0)))
            index += 1

    time_str = utils.cur_time()
    print(
        '-----------------STATISTICAL VARIABLES %s HAVE SAVED-----------------'
        % time_str)
    np.save('global_sat_ratio_' + time_str + '.npy', global_sat_ratio)
    np.save('global_slice_avg_RU_' + time_str + '.npy', global_slice_avg_RU)
def train():
    step = 0  # 用来控制什么时候学习
    sys_sat_list = []
    sys_RU_list = []
    sys_reward_list = []
    index = 0  # 统计变量数组的下标
    max_ue_num = [60, 11, 240, 124]  # 每类切片的最大用户数量,用来做归一化[180, 12, 220, 200]

    for episode in range(MAX_EPISODES):
        print('-----------NEW EPISODE %d STARTING-------------' % episode)
        # 初始化环境
        ud = 0
        slice1_ud = np.arange(1, 8)  # [0, 4, 4, 4, 8, 8, 8]
        ue_num = [4, 4, 100, 40]
        # ue_num = [60, 12, 240, 124]
        # ue_num = [130 + ud, 8 + int(np.floor(ud / 6)), 220 + ud, 0 + ud]
        env = SimulationEnv()
        TPs = env.generate_bs(bs_num=4)
        ues = env.generate_ue(ue_num=ue_num)

        r_sk = np.ones([4, 4]) * 0.1  # 资源比例初始化
        r_sk[1, :] = 0.2
        r_sk[2, :] = 0.13
        # r_sk = np.array([[0.04, 0.04, 0.04, 0.04],
        #                  [0.05, 0.05, 0.05, 0.05],
        #                  [0.06, 0.06, 0.06, 0.06],
        #                  [0.05, 0.05, 0.05, 0.05]])

        # user association
        association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                      TPs=TPs)

        # generate all ue subframe index
        data_num = 20
        slice_user_seq, max_index = env.generate_subframe_index(
            association_ues, lamda_k=env.lamda_k, data_num=data_num)

        # physical resource allocation
        pr = PhysicalResource(TPs=TPs, user_qos=env.user_qos, env=env)
        slice_sat_ratio, slice_avg_RU, slice_bs_sat, slice_bs_RU, slice_spectral_efficiency, global_slice_queue_len = \
            pr.allocate(association_ues, r_sk, max_index, rb_size)

        # r_s = np.zeros(4)
        # for si in range(4):
        #     r_s[si] = np.sum(r_sk[si, :]) / 4
        lamda_k = np.array(env.lamda_k)
        packet_size = np.array(env.packet_size)
        slice_load = np.array(ue_num, dtype=np.float) / np.array(
            max_ue_num, dtype=np.float)  # 切片负载情况
        # for si in range(4):
        #     slice_load[si] /= np.sum(max_ue_num * lamda_k * packet_size)
        observation = np.concatenate((slice_load, slice_sat_ratio))  # 初始化状态

        sys_sat, sys_RU, sys_reward = [], [], []
        for j in range(MAX_EP_STEPS):
            # 刷新环境
            ud += 20
            ue_num = [
                4 + int(np.ceil(ud * 2 / 5)), 4 + slice1_ud[j], 100 + ud,
                40 + int(np.ceil(ud * 3 / 5))
            ]
            # ue_num = [4 + 6 * (j + 1), 4 + slice1_ud[j], 100 + ud, 40 + int(np.ceil(ud * 3 / 5))]
            ues = env.generate_ue(ue_num=ue_num)

            # user association
            association_ues, rb_size = env.ue_association(admission_ues=ues,
                                                          TPs=TPs)

            # generate all ue subframe index
            data_num = 20
            slice_user_seq, max_index = env.generate_subframe_index(
                association_ues, lamda_k=env.lamda_k, data_num=data_num)

            # DQN 根据观测值选择行为
            action, is_random = RL.choose_action(observation)
            # action = np.random.randint(0, 4, 1)[0]

            # 环境根据行为给出下一个 state, reward
            observation_, reward, r_sk_, slice_bs_sat, slice_bs_RU, real_action, \
                r_allocated, r_reserved, slice_spectral_efficiency = pr.step(action, association_ues, r_sk.copy(),
                                                                             max_index, rb_size)

            slice_load = np.array(ue_num, dtype=np.float) / np.array(
                max_ue_num, dtype=np.float)  # 切片负载情况
            # for si in range(4):
            #     slice_load[si] /= np.sum(max_ue_num * lamda_k * packet_size)
            next_state = np.concatenate(
                (slice_load, observation_[4:8]))  # 下一个状态
            # DQN存储记忆
            # min_reward = (np.min(observation_[4:8]) - 0.5) + (np.min(observation_[8:]) - 0.5)
            min_reward = np.min(reward[0:4])
            RL.store_transition(observation, action, min_reward,
                                next_state)  # 修改为最小的reward

            # 控制学习起始时间和频率(先积累一些记忆再开始学习)
            if (step > 300) and (step % 2 == 0):
                RL.learn()

            # 记录统计量
            sys_sat.append(np.mean(observation_[4:8]))
            sys_RU.append(np.mean(observation_[8:12]))
            # sys_reward.append(reward)
            sys_reward.append(reward[4])
            global_statistics[index, :] = np.concatenate(
                ([episode], ue_num, r_sk.flatten(), real_action, [is_random],
                 r_sk_.flatten(), observation_[4:8], observation_[8:12],
                 reward, slice_bs_sat.flatten(), slice_bs_RU.flatten(),
                 r_allocated, r_reserved, slice_spectral_efficiency))

            # if (np.abs(observation - observation_) < 0.001).all():
            #     print('..............convergence...........')
            #     break

            # 将下一个 state_ 变为下次循环的state
            observation = next_state
            r_sk = r_sk_

            step += 1  # 总步数
            index += 1  # 下标自增

        sys_RU_list.append(np.mean(sys_RU))
        sys_sat_list.append(np.mean(sys_sat))
        sys_reward_list.append(np.mean(sys_reward))
    # 保存变量
    time_str = utils.cur_time()
    print(
        '-----------------STATISTICAL VARIABLES %s HAVE SAVED-----------------'
        % time_str)
    np.save('sys_sat_list_' + time_str + '.npy', sys_sat_list)
    np.save('sys_RU_list_' + time_str + '.npy', sys_RU_list)
    np.save('sys_reward_list_' + time_str + '.npy', sys_reward_list)
    np.save('cost_his_' + time_str + '.npy', RL.cost_his)
    np.save('global_statistics_' + time_str + '.npy', global_statistics)
    # 保存模型
    RL.save(time_str)
    print('-----------------MODEL HAS SAVED-----------------')

    # plot(sys_RU_list, sys_sat_list)
    # end of train
    print('-----------------TRAIN OVER--------------------')