Beispiel #1
0
def draw(i,
         path,
         energy,
         route,
         actions,
         ob_,
         sqrt_,
         r_,
         discon_,
         over_map,
         final_steps,
         Run=False):
    mkdir(path)
    label = 'epoch:' + str(FLAGS.max_epoch) + '\nUAV: ' + str(FLAGS.num_uav) + '\n map size: ' + str(FLAGS.size_map) + '\n sensing range:' + str(FLAGS.radius) \
            + '\n constraint:' + str(FLAGS.constrain)

    Fig = plt.figure(figsize=(18, 10))  # Create a `figure' instance

    Ax = Fig.add_subplot(321)
    plt.xlabel('No. of epochs')
    plt.ylabel('Average attained coverage')
    Ax.plot(range(final_steps), ob_)

    # #
    Bx = Fig.add_subplot(322)
    plt.xlabel('No. of epochs')
    plt.ylabel('Jain\'s fairness index')
    Bx.plot(range(final_steps), sqrt_)

    # #
    Cx = Fig.add_subplot(323)
    plt.xlabel('No. of epochs')
    plt.ylabel('Accumulated reward')
    Cx.plot(range(final_steps), r_)

    # #
    Dx = Fig.add_subplot(324)
    plt.xlabel('No. of epochs')
    plt.ylabel('Accumulated times \nof disconnection')
    Dx.plot(range(final_steps), discon_, color='blue')

    Gx = Fig.add_subplot(326)
    plt.xlabel('No. of epochs')
    plt.ylabel('Accumulated times \nto fly outside the map')
    line_ob, = Gx.plot(range(final_steps), over_map, color='green')
    plt.legend([
        line_ob,
    ], [
        label,
    ])

    Hx = Fig.add_subplot(325)
    plt.xlabel('No. of epochs')
    plt.ylabel('Energy consumption')
    Hx.plot(range(final_steps), energy, color='green')

    Fig.subplots_adjust(hspace=0.4)
    Fig.savefig(path + '/pic_' + str(i) + '.png')
    plt.close()
Beispiel #2
0
def draw_episodes(i, path, coverage, j_index, energy, A_discon, A_over_map,
                  A_efficiency, A_reward, final_steps):
    mkdir(path)
    steps = final_steps
    plt.figure(figsize=(18, 10))

    plt.subplot(4, 2, 1)
    plt.xlabel("No. of step")
    plt.ylabel("Energy efficiency")
    plt.plot(range(steps), A_efficiency, color='b')

    plt.subplot(4, 2, 2)
    plt.xlabel("No. of step")
    plt.ylabel("Fairness")
    plt.plot(range(steps), j_index, color='r')

    plt.subplot(4, 2, 3)
    plt.xlabel("No. of step")
    plt.ylabel("Coverage")
    plt.plot(range(steps), coverage, color='g')

    plt.subplot(4, 2, 4)
    plt.xlabel("No. of step")
    plt.ylabel("Energy")
    plt.plot(range(steps), energy, color='c')

    plt.subplot(4, 2, 5)
    plt.xlabel("No. of step")
    plt.ylabel("Disconnect")
    plt.plot(range(steps), A_discon, color='m')

    plt.subplot(4, 2, 6)
    plt.xlabel("No. of step")
    plt.ylabel("Over map counter")
    plt.plot(range(steps), A_over_map, color='y')

    plt.subplot(4, 2, 7)
    plt.xlabel("No. of step")
    plt.ylabel("Reward")
    plt.plot(range(steps), A_reward, color='k')

    plt.savefig(path + "episodes_" + str(i) + '.png')
    plt.close()
Beispiel #3
0
def draw_single_episode(path, episode_number, efficiency, coverage, fairness,
                        energy, disconnect, over_map, reward):
    mkdir(path)
    steps = len(efficiency)
    plt.figure(figsize=(40, 20))
    plt.subplot(4, 2, 1)
    plt.xlabel("No. of step")
    plt.ylabel("Energy efficiency")
    plt.plot(range(steps), efficiency, color='b')

    plt.subplot(4, 2, 2)
    plt.xlabel("No. of step")
    plt.ylabel("Coverage")
    plt.plot(range(steps), coverage, color='g')

    plt.subplot(4, 2, 3)
    plt.xlabel("No. of step")
    plt.ylabel("Fairness")
    plt.plot(range(steps), fairness, color='r')

    plt.subplot(4, 2, 4)
    plt.xlabel("No. of step")
    plt.ylabel("Energy")
    plt.plot(range(steps), energy, color='c')

    plt.subplot(4, 2, 5)
    plt.xlabel("No. of step")
    plt.ylabel("Disconnect")
    plt.plot(range(steps), disconnect, color='m')

    plt.subplot(4, 2, 6)
    plt.xlabel("No. of step")
    plt.ylabel("Over map counter")
    plt.plot(range(steps), over_map, color='y')

    plt.subplot(4, 2, 7)
    plt.xlabel("No. of step")
    plt.ylabel("Reward")
    plt.plot(range(steps), over_map, color='k')

    plt.savefig(path + "episode_" + str(episode_number) + '_info.png')
    plt.close()
Beispiel #4
0
def test(arglist):
    debug = False
    num_tasks = arglist.num_task  # 总共有多少个任务
    list_of_taskenv = []  # env list
    load_path = arglist.load_dir
    with U.single_threaded_session():
        if debug:
            begin = time_begin()
        # 1.1创建每个任务的actor trainer和critic trainer
        trainers_list = []
        env = make_env(arglist.scenario, arglist.benchmark)
        # Create agent trainers
        obs_shape_n = [env.observation_space[i].shape for i in range(env.n)]
        num_adversaries = min(env.n, arglist.num_adversaries)
        for i in range(num_tasks):
            list_of_taskenv.append(make_env(arglist.scenario))
            trainers = get_trainers(list_of_taskenv[i],
                                    "task_" + str(i + 1) + "_",
                                    num_adversaries,
                                    obs_shape_n,
                                    arglist)
            trainers_list.append(trainers)
    
        print('Using good policy {} and adv policy {}'.format(arglist.good_policy, arglist.adv_policy))
    
        global_steps_tensor = tf.Variable(tf.zeros(num_tasks), trainable=False)  # global timesteps for each env
        global_steps_ph = tf.placeholder(tf.float32, [num_tasks])
        global_steps_assign_op = tf.assign(global_steps_tensor, global_steps_ph)
        model_number = int(arglist.num_episodes / arglist.save_rate)
        saver = tf.train.Saver(max_to_keep=model_number)
    
        efficiency_list = []
        for i in range(num_tasks):
            efficiency_list.append(tf.placeholder(tf.float32, shape=None, name="efficiency_placeholder" + str(i)))
        efficiency_summary_list = []
        for i in range(num_tasks):
            efficiency_summary_list.append(tf.summary.scalar("efficiency_%s" % i, efficiency_list[i]))
        writer = tf.summary.FileWriter("../summary/efficiency")
    
        # Initialize
        U.initialize()
        for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
            print(var)

        if debug:
            print(time_end(begin, "initialize"))
            begin = time_begin()
            
        model_name = arglist.load_dir.split('/')[-2] + '/'
        mkdir(arglist.pictures_dir_test + model_name)
        model_index_step = 0
        model_number_total = arglist.train_num_episodes / arglist.save_rate
        max_model_index = 0
        max_average_energy_efficiency = 0

        while True:
            if model_index_step >= model_number_total:
                with open(arglist.pictures_dir_test + model_name + 'test_report' + '.txt', 'a+') as file:
                    report = '\nModel ' + str(max_model_index) + ' attained max average energy efficiency' + \
                             '\nMax average energy efficiency:' + str(max_average_energy_efficiency)
                    file.write(report)
                break
            else:
                model_index_step += 1
            
            # 1.4 加载checkpoints
            if arglist.load_dir == "":
                arglist.load_dir = arglist.save_dir
            if arglist.display or arglist.restore or arglist.benchmark:
                print('Loading previous state...')
                model_load_dir = arglist.load_dir + str(model_index_step * arglist.save_rate - 1) + '/'
                U.load_state(arglist.load_dir)
            # global_steps = tf.get_default_session().run(global_steps_tensor)

            # 1.5 初始化ENV
            obs_n_list = []
            for i in range(num_tasks):
                obs_n = list_of_taskenv[i].reset()
                obs_n_list.append(obs_n)

            # 1.2 全局变量初始化
            episodes_rewards = [[0.0] for _ in range(num_tasks)]  # 每个元素为在一个episode中所有agents rewards的和
            # agent_rewards[i]中的每个元素记录单个agent在一个episode中所有rewards的和
            agent_rewards = [[[0.0] for _ in range(env.n)] for _ in range(num_tasks)]
            final_ep_rewards = [[] for _ in range(num_tasks)]  # sum of rewards for training curve
            final_ep_ag_rewards = [[] for _ in range(num_tasks)]  # agent rewards for training curve

            energy_consumptions_for_test = [[] for _ in range(num_tasks)]
            j_index = [[] for _ in range(num_tasks)]
            aver_cover = [[] for _ in range(num_tasks)]
            instantaneous_dis = [[] for _ in range(num_tasks)]
            instantaneous_out_the_map = [[] for _ in range(num_tasks)]
            energy_efficiency = [[] for _ in range(num_tasks)]
            instantaneous_accmulated_reward = [[] for _ in range(num_tasks)]

            # 1.3 局部变量初始化
            local_steps = np.zeros(num_tasks)  # local timesteps for each env
            energy_one_episode = [[] for _ in range(num_tasks)]
            j_index_one_episode = [[] for _ in range(num_tasks)]
            aver_cover_one_episode = [[] for _ in range(num_tasks)]
            over_map_counter = np.zeros(num_tasks)
            over_map_one_episode = [[] for _ in range(num_tasks)]
            disconnected_number_counter = np.zeros(num_tasks)
            disconnected_number_one_episode = [[] for _ in range(num_tasks)]
            episode_reward_step = np.zeros(num_tasks)  # 累加一个episode里每一步的所有智能体的平均reward
            accmulated_reward_one_episode = [[] for _ in range(num_tasks)]
            route_one_episode = [[] for _ in range(num_tasks)]
            

            bl_coverage = 0.8
            bl_jainindex = 0.8
            bl_loss = 100
            energy_efficiency = []

            print('Starting iterations...')
            while True:
                for task_index in range(num_tasks):
                    # 2.1更新环境,采集样本
                    current_env = list_of_taskenv[task_index]
                    current_trainers = trainers_list[task_index]
                    # get action
                    action_n = [agent.action(obs) for agent, obs in zip(trainers, obs_n)]
                    # environment step
                    new_obs_n, rew_n, done_n, info_n = current_env.step(action_n)
                    if debug:
                        print(time_end(begin, "env.step"))
                        begin = time_begin()
                    local_steps[task_index] += 1  # 更新局部计数器
                    global_steps[task_index] += 1  # 更新全局计数器
                    done = all(done_n)
                    terminal = (local_steps[task_index] >= arglist.max_episode_len)
                    # 收集experience
                    for i in range(env.n):
                        current_trainers[i].experience(obs_n_list[task_index][i], action_n[i], rew_n[i], new_obs_n[i],
                                                       done_n[i], terminal)

                    # 更新obs
                    obs_n_list[task_index] = new_obs_n
                    # 更新reward
                    for i, rew in enumerate(rew_n):
                        episodes_rewards[task_index][-1] += rew
                        agent_rewards[task_index][i][-1] += rew
                    # energy
                    energy_one_episode[task_index].append(current_env.get_energy())
                    # fair index
                    j_index_one_episode[task_index].append(current_env.get_jain_index())
                    # coverage
                    aver_cover_one_episode[task_index].append(current_env.get_aver_cover())
                    # over map counter
                    over_map_counter[task_index] += current_env.get_over_map()
                    over_map_one_episode[task_index].append(over_map_counter[task_index])
                    # disconnected counter
                    disconnected_number_counter[task_index] += current_env.get_dis()
                    disconnected_number_one_episode[task_index].append(disconnected_number_counter[task_index])
                    # reward
                    episode_reward_step[task_index] += np.mean(rew_n)
                    accmulated_reward_one_episode[task_index].append(episode_reward_step[task_index])
                    route = current_env.get_agent_pos()
                    route_one_episode[task_index].append(route)

                    if done or terminal:
                        # reset custom statistics variabl between episode and epoch---------------------------------------------
                        instantaneous_accmulated_reward.append(accmulated_reward_one_episode[-1])
                        j_index.append(j_index_one_episode[-1])
                        instantaneous_dis.append(disconnected_number_one_episode[-1])
                        instantaneous_out_the_map.append(over_map_one_episode[-1])
                        aver_cover.append(aver_cover_one_episode[-1])
                        energy_consumptions_for_test.append(energy_one_episode[-1])
                        energy_efficiency.append(aver_cover_one_episode[-1] * j_index_one_episode[-1] / energy_one_episode[-1])
                        print('Episode: %d - energy_consumptions: %s ' % (train_step / arglist.max_episode_len,
                                                                        str(env._get_energy_origin())))

                        if task_index == num_tasks - 1:
                            energy_one_episode = [[] for _ in range(num_tasks)]
                            j_index_one_episode = [[] for _ in range(num_tasks)]
                            aver_cover_one_episode = [[] for _ in range(num_tasks)]
                            over_map_counter = np.zeros(num_tasks)
                            over_map_one_episode = [[] for _ in range(num_tasks)]
                            disconnected_number_counter = np.zeros(num_tasks)
                            disconnected_number_one_episode = [[] for _ in range(num_tasks)]
                            episode_reward_step = np.zeros(num_tasks)
                            accmulated_reward_one_episode = [[] for _ in range(num_tasks)]
                            route_one_episode = [[] for _ in range(num_tasks)]

                        if arglist.draw_picture_test:
                            if len(episode_rewards) % arglist.save_rate == 0:
                                if np.mean(energy_efficiency) > max_average_energy_efficiency:
                                    max_model_index = model_index_step * arglist.save_rate - 1
                                    max_average_energy_efficiency = np.mean(energy_efficiency)
                                with open(arglist.pictures_dir_test + model_name + 'test_report' + '.txt', 'a+') as file:
                                    report = '\nModel-' + str(model_index_step * arglist.save_rate - 1) + \
                                             '-testing ' + str(arglist.num_episodes) + ' episodes\'s result:' + \
                                             '\nAverage average attained coverage: ' + str(np.mean(aver_cover)) + \
                                             '\nAverage Jaint\'s fairness index: ' + str(np.mean(j_index)) + \
                                             '\nAverage normalized average energy consumptions:' + str(np.mean(energy_consumptions_for_test)) + \
                                             '\nAverage energy efficiency:' + str(np.mean(energy_efficiency)) + '\n'
                                    file.write(report)
                                draw_util.drawTest(model_index_step * arglist.save_rate - 1, arglist.pictures_dir_test + model_name,
                                                   energy_consumptions_for_test, aver_cover, j_index,
                                                   instantaneous_accmulated_reward, instantaneous_dis, instantaneous_out_the_map
                                                   , len(aver_cover), bl_coverage, bl_jainindex, bl_loss, energy_efficiency, False)
                        # reset custom statistics variabl between episode and epoch----------------------------------------

                    # for displaying learned policies
                    if arglist.draw_picture_test:
                        if len(episode_rewards) > arglist.num_episodes:
                            break
                        continue

                    # saves final episode reward for plotting training curve later
                    if len(episode_rewards) > arglist.num_episodes:
                        rew_file_name = arglist.plots_dir + arglist.exp_name + '_rewards.pkl'
                        with open(rew_file_name, 'wb') as fp:
                            pickle.dump(final_ep_rewards, fp)
                        agrew_file_name = arglist.plots_dir + arglist.exp_name + '_agrewards.pkl'
                        with open(agrew_file_name, 'wb') as fp:
                            pickle.dump(final_ep_ag_rewards, fp)
                        print('...Finished total of {} episodes.'.format(len(episode_rewards)))
                        break
Beispiel #5
0
def train(arglist, restore_model_number):
    debug = False
    multi_process = arglist.mp
    num_tasks = arglist.num_task_transfer  # 总共有多少个任务
    list_of_taskenv = []  # env list
    save_path = arglist.save_dir
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    with U.single_threaded_session():
        sess = tf.get_default_session()
        if debug:
            begin = time_begin()
        # 1.1创建每个任务的actor trainer和critic trainer
        env = make_env(arglist.scenario, reward_type=arglist.reward_type)
        env.set_map(
            sample_map(arglist.test_data_dir + arglist.test_data_name +
                       "_1.h5"))

        # Create agent trainers
        obs_shape_n = [env.observation_space[i].shape for i in range(env.n)]
        num_adversaries = min(env.n, arglist.num_adversaries)
        actor_0 = get_trainers(env,
                               "actor_",
                               num_adversaries,
                               obs_shape_n,
                               arglist,
                               type=0,
                               session=sess)

        # 1.2创建每个任务的actor trainer和critic trainer
        critic_list = []  # 所有任务critic的list
        actor_list = []

        print('Using good policy {} and adv policy {}'.format(
            arglist.good_policy, arglist.adv_policy))

        # 1.2 全局变量初始化
        episodes_rewards = [[0.0] for _ in range(num_tasks)
                            ]  # 每个元素为在一个episode中所有agents rewards的和
        # agent_rewards[i]中的每个元素记录单个agent在一个episode中所有rewards的和
        agent_rewards = [[[0.0] for _ in range(env.n)]
                         for _ in range(num_tasks)]
        final_ep_rewards = [[] for _ in range(num_tasks)
                            ]  # sum of rewards for training curve
        final_ep_ag_rewards = [[] for _ in range(num_tasks)
                               ]  # agent rewards for training curve

        energy_consumptions_for_test = [[] for _ in range(num_tasks)]
        j_index = [[] for _ in range(num_tasks)]
        aver_cover = [[] for _ in range(num_tasks)]
        instantaneous_dis = [[] for _ in range(num_tasks)]
        instantaneous_out_the_map = [[] for _ in range(num_tasks)]
        energy_efficiency = [[] for _ in range(num_tasks)]
        instantaneous_accmulated_reward = [[] for _ in range(num_tasks)]

        model_number = int(arglist.num_train_episodes / arglist.save_rate)
        saver = tf.train.Saver(max_to_keep=model_number)

        # 1.3 局部变量初始化
        global_steps = np.zeros(num_tasks)
        local_steps = np.zeros(num_tasks)  # local timesteps for each env
        energy_one_episode = [[] for _ in range(num_tasks)]
        j_index_one_episode = [[] for _ in range(num_tasks)]
        aver_cover_one_episode = [[] for _ in range(num_tasks)]
        over_map_counter = np.zeros(num_tasks)
        over_map_one_episode = [[] for _ in range(num_tasks)]
        disconnected_number_counter = np.zeros(num_tasks)
        disconnected_number_one_episode = [[] for _ in range(num_tasks)]
        episode_reward_step = np.zeros(
            num_tasks)  # 累加一个episode里每一步的所有智能体的平均reward
        accmulated_reward_one_episode = [[] for _ in range(num_tasks)]
        route_one_episode = [[] for _ in range(num_tasks)]

        if debug:
            print(time_end(begin, "step3"))
            begin = time_begin()

        # 1.4 加载checkpoints
        if arglist.load_dir == "":
            arglist.load_dir = os.path.join(save_path,
                                            str(restore_model_number),
                                            "model.ckpt")
        if arglist.transfer_restore:
            print('Loading previous state...')
            U.load_state(arglist.load_dir)

        for i in range(num_tasks):
            list_of_taskenv.append(
                make_env(arglist.scenario, reward_type=arglist.reward_type))
            critic_trainers = get_trainers(list_of_taskenv[i],
                                           "task_" + str(i + 1) + "_",
                                           num_adversaries,
                                           obs_shape_n,
                                           arglist,
                                           actors=actor_0,
                                           type=1,
                                           session=sess)
            actor_trainers = get_trainers(list_of_taskenv[i],
                                          "task_" + str(i + 1) + "_",
                                          num_adversaries,
                                          obs_shape_n,
                                          arglist,
                                          actor_env_name="actor_",
                                          type=2,
                                          session=sess)
            actor_list.append(actor_trainers)
            critic_list.append(critic_trainers)

        # Initialize
        U.initialize()
        for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
            print(var)

        # 1.5 初始化ENV
        obs_n_list = []
        for i in range(num_tasks):
            obs_n = list_of_taskenv[i].reset()
            list_of_taskenv[i].set_map(
                sample_map(arglist.test_data_dir + arglist.test_data_name +
                           "_" + str(i + 1) + ".h5"))
            obs_n_list.append(obs_n)

        if debug:
            print(time_end(begin, "initialize"))
            begin = time_begin()
        # 2.训练
        t_start = time.time()
        print('Starting iterations...')
        episode_start_time = time.time()
        state_dim = obs_shape_n[0][0]

        history_n = [[
            queue.Queue(arglist.history_length) for _ in range(env.n)
        ] for _ in range(num_tasks)]
        for i in range(num_tasks):
            for j in range(env.n):
                for _ in range(arglist.history_length):
                    history_n[i][j].put(obs_n_list[i][j])

        while True:
            for task_index in range(num_tasks):
                # 2.1更新环境,采集样本
                current_env = list_of_taskenv[task_index]
                # get action
                # action_n = [agent.action(obs) for agent, obs in zip(actor_0, obs_n_list[task_index])]
                action_n = [
                    agent.action(obs)
                    for agent, obs in zip(actor_0, history_n[task_index])
                ]
                # environment step
                new_obs_n, rew_n, done_n, info_n = current_env.step(action_n)
                current_critics = critic_list[task_index]
                current_actors = actor_list[task_index]
                if debug:
                    print(time_end(begin, "env.step"))
                    begin = time_begin()
                local_steps[task_index] += 1  # 更新局部计数器
                global_steps[task_index] += 1  # 更新全局计数器
                done = all(done_n)
                terminal = (local_steps[task_index] >= arglist.max_episode_len)
                # 收集experience
                for i in range(env.n):
                    current_critics[i].experience(obs_n_list[task_index][i],
                                                  action_n[i], rew_n[i],
                                                  new_obs_n[i], done_n[i],
                                                  terminal)

                # 更新obs
                obs_n_list[task_index] = new_obs_n
                for i in range(env.n):
                    history_n[task_index][i].get()
                    history_n[task_index][i].put(new_obs_n[i])
                # 更新reward
                for i, rew in enumerate(rew_n):
                    episodes_rewards[task_index][-1] += rew
                    agent_rewards[task_index][i][-1] += rew

                # 2.2,优化每一个任务的critic and acotr
                for critic in current_critics:
                    critic.preupdate()

                for critic in current_critics:
                    critic.update(current_critics, global_steps[task_index])

                for index, actor in enumerate(current_actors):
                    actor.update(current_actors, current_critics,
                                 global_steps[task_index], index)

                if debug:
                    print(time_end(begin, "update actor"))
                    begin = time_begin()

                # 2.4 记录和更新train信息
                # energy
                energy_one_episode[task_index].append(current_env.get_energy())
                # fair index
                j_index_one_episode[task_index].append(
                    current_env.get_jain_index())
                # coverage
                aver_cover_one_episode[task_index].append(
                    current_env.get_aver_cover())
                # over map counter
                over_map_counter[task_index] += current_env.get_over_map()
                over_map_one_episode[task_index].append(
                    over_map_counter[task_index])
                # disconnected counter
                disconnected_number_counter[task_index] += current_env.get_dis(
                )
                disconnected_number_one_episode[task_index].append(
                    disconnected_number_counter[task_index])
                # reward
                episode_reward_step[task_index] += np.mean(rew_n)
                accmulated_reward_one_episode[task_index].append(
                    episode_reward_step[task_index])
                route = current_env.get_agent_pos()
                route_one_episode[task_index].append(route)
                if debug:
                    print(time_end(begin, "others"))
                    begin = time_begin()

                episode_number = math.ceil(global_steps[task_index] /
                                           arglist.max_episode_len)
                if done or terminal:
                    model_name = save_path.split('/')[-2] + '/'
                    temp_efficiency = np.array(
                        aver_cover_one_episode[task_index]) * np.array(
                            j_index_one_episode[task_index]) / np.array(
                                energy_one_episode[task_index])
                    draw_util.draw_single_episode(
                        arglist.pictures_dir_transfer_train + model_name +
                        "single_episode_task_" + str(task_index) + "/",
                        episode_number, temp_efficiency,
                        aver_cover_one_episode[task_index],
                        j_index_one_episode[task_index],
                        energy_one_episode[task_index],
                        disconnected_number_one_episode[task_index],
                        over_map_one_episode[task_index],
                        accmulated_reward_one_episode[task_index])
                    # 记录每个episode的变量
                    energy_consumptions_for_test[task_index].append(
                        energy_one_episode[task_index][-1])  # energy
                    j_index[task_index].append(
                        j_index_one_episode[task_index][-1])  # fairness index
                    aver_cover[task_index].append(
                        aver_cover_one_episode[task_index][-1])  # coverage
                    instantaneous_dis[task_index].append(
                        disconnected_number_one_episode[task_index]
                        [-1])  # disconnected
                    instantaneous_out_the_map[task_index].append(
                        over_map_one_episode[task_index][-1])  # out of the map
                    instantaneous_accmulated_reward[task_index].append(
                        accmulated_reward_one_episode[task_index]
                        [-1])  # reward
                    energy_efficiency[task_index].append(
                        aver_cover_one_episode[task_index][-1] *
                        j_index_one_episode[task_index][-1] /
                        energy_one_episode[task_index][-1])  # efficiency

                    episode_end_time = time.time()
                    episode_time = episode_end_time - episode_start_time
                    episode_start_time = episode_end_time
                    with open(
                            arglist.pictures_dir_transfer_train + model_name +
                            "task_" + str(task_index) + '_train_info' + '.txt',
                            'a+') as f:
                        info = "Task index: %d, Episode number %d, energy consumption: %s, efficiency: %s, time: %s" % (
                            task_index, episode_number,
                            str(current_env.get_energy_origin()),
                            str(energy_efficiency[task_index][-1]),
                            str(round(episode_time, 3)))
                        f.write(info + "\n")
                    print(info)

                    # 应该在每个重置每个episode中的局部变量--------------------------------------------
                    if task_index == num_tasks - 1:
                        energy_one_episode = [[] for _ in range(num_tasks)]
                        j_index_one_episode = [[] for _ in range(num_tasks)]
                        aver_cover_one_episode = [[] for _ in range(num_tasks)]
                        over_map_counter = np.zeros(num_tasks)
                        over_map_one_episode = [[] for _ in range(num_tasks)]
                        disconnected_number_counter = np.zeros(num_tasks)
                        disconnected_number_one_episode = [
                            [] for _ in range(num_tasks)
                        ]
                        episode_reward_step = np.zeros(num_tasks)
                        accmulated_reward_one_episode = [
                            [] for _ in range(num_tasks)
                        ]
                        route_one_episode = [[] for _ in range(num_tasks)]

                    # 重置局部变量
                    obs_n_list[task_index] = current_env.reset()  # 重置env
                    current_env.set_map(
                        sample_map(arglist.test_data_dir +
                                   arglist.test_data_name + "_" +
                                   str(task_index + 1) + ".h5"))
                    local_steps[task_index] = 0  # 重置局部计数器

                    # 更新全局变量
                    episodes_rewards[task_index].append(0)  # 添加新的元素
                    for reward in agent_rewards[task_index]:
                        reward.append(0)

                # save model, display training output
                if terminal and (episode_number % arglist.save_rate == 0):
                    # tf.get_default_session().run(global_steps_assign_op, feed_dict={global_steps_ph: global_steps})
                    # save_dir_custom = os.path.join(save_path, str(episode_number), 'model.ckpt')
                    # U.save_state(save_dir_custom, saver=saver)
                    # print statement depends on whether or not there are adversaries
                    # 最新save_rate个episode的平均reward
                    save_rate_mean_reward = np.mean(
                        episodes_rewards[task_index][-arglist.save_rate:])
                    if num_adversaries == 0:
                        print(
                            "steps: {}, episodes: {}, mean episode reward: {}, time: {}"
                            .format(global_steps[task_index], episode_number,
                                    save_rate_mean_reward,
                                    round(time.time() - t_start, 3)))
                    else:
                        print(
                            "steps: {}, episodes: {}, mean episode reward: {}, agent episode reward: {}, time: {}"
                            .format(global_steps[task_index], episode_number,
                                    save_rate_mean_reward, [
                                        np.mean(rew[-arglist.save_rate:])
                                        for rew in agent_rewards[task_index]
                                    ], round(time.time() - t_start, 3)))

                    t_start = time.time()

                    final_ep_rewards[task_index].append(save_rate_mean_reward)
                    for rew in agent_rewards[task_index]:
                        final_ep_ag_rewards[task_index].append(
                            np.mean(rew[-arglist.save_rate:]))

                    # 保存train曲线
                    if arglist.draw_picture_train:
                        # model_name = save_path.split('/')[-2] + '/'
                        draw_util.draw_episodes(
                            episode_number,
                            arglist.pictures_dir_transfer_train + model_name +
                            "all_episodes_task_" + str(task_index) + "/",
                            aver_cover[task_index], j_index[task_index],
                            energy_consumptions_for_test[task_index],
                            instantaneous_dis[task_index],
                            instantaneous_out_the_map[task_index],
                            energy_efficiency[task_index],
                            instantaneous_accmulated_reward[task_index],
                            len(aver_cover[task_index]))
                # saves final episode reward for plotting training curve later
                if episode_number > arglist.num_train_episodes:
                    mkdir(arglist.plots_dir)
                    rew_file_name = arglist.plots_dir + arglist.exp_name + str(
                        task_index) + '_rewards.pkl'
                    with open(rew_file_name, 'wb') as fp:
                        pickle.dump(final_ep_rewards, fp)
                    agrew_file_name = arglist.plots_dir + arglist.exp_name + str(
                        task_index) + '_agrewards.pkl'
                    with open(agrew_file_name, 'wb') as fp:
                        pickle.dump(final_ep_ag_rewards, fp)
                        print('...Finished total of {} episodes.'.format(
                            episode_number))
            if episode_number > arglist.num_train_episodes:
                break
Beispiel #6
0
def drawTest(i,
             path,
             energy_efficiency,
             energy,
             coverage,
             jainindex,
             r_,
             discon_,
             over_map,
             final_steps,
             BL_coverage,
             BL_jain,
             BL_loss,
             Run=False):
    mkdir(path)
    label = 'epoch:' + str(FLAGS.max_epoch) + '\nUAV: ' + str(FLAGS.num_uav) + '\n map size: ' + str(FLAGS.size_map) + '\n sensing range:' + str(FLAGS.radius) \
            + '\n constraint:' + str(FLAGS.constrain) + '\n average energy efficiency:' + str(np.mean(energy_efficiency)) \
            + '\n max energy efficiency:' + str(np.max(energy_efficiency))

    Fig = plt.figure(figsize=(18, 10))  # Create a `figure' instance

    Ax = Fig.add_subplot(421)
    plt.xlabel('No. of episodes')
    plt.ylabel('Average attained coverage')
    Ax.plot(range(final_steps), coverage)
    Ax.plot([BL_coverage] * final_steps)

    # #
    Bx = Fig.add_subplot(422)
    plt.xlabel('No. of episodes')
    plt.ylabel('Jain\'s fairness index')
    Bx.plot(range(final_steps), jainindex)
    Bx.plot([BL_jain] * final_steps)

    # #
    Cx = Fig.add_subplot(423)
    plt.xlabel('No. of episodes')
    plt.ylabel('Instantaneous reward')
    Cx.plot(range(final_steps), r_)

    # #
    Dx = Fig.add_subplot(424)
    plt.xlabel('No. of episodes')
    plt.ylabel('Instantaneous times \nof disconnection')
    Dx.plot(range(final_steps), discon_, color='blue')
    Dx.plot([BL_loss] * final_steps)

    Gx = Fig.add_subplot(426)
    plt.xlabel('No. of episodes')
    plt.ylabel('Accumulated times \nto fly outside the map')
    line_ob, = Gx.plot(range(final_steps), over_map, color='green')
    plt.legend([
        line_ob,
    ], [
        label,
    ])

    Hx = Fig.add_subplot(425)
    plt.xlabel('No. of episodes')
    plt.ylabel('Average energy consumption')
    Hx.plot(range(final_steps), energy, color='green')

    Hx = Fig.add_subplot(427)
    plt.xlabel('No. of episodes')
    plt.ylabel('Energy efficiency')
    Hx.plot(range(final_steps), energy_efficiency, color='magenta')

    Fig.subplots_adjust(hspace=0.4)
    Fig.savefig(path + '/pic_' + str(i) + '.png')
    plt.close()
Beispiel #7
0
def random_maddpg_test(arglist):
    debug = False
    num_tasks = arglist.num_task_transfer  # 总共有多少个任务
    list_of_taskenv = []  # env list
    graph = tf.Graph()
    with graph.as_default():
        with U.single_threaded_session():
            if debug:
                begin = time_begin()
            # 1.1创建common actor
            env = make_env(arglist.scenario, reward_type=arglist.reward_type)
            env.set_map(sample_map(arglist.test_data_dir + arglist.test_data_name + "_1.h5"))
            # Create agent trainers
            obs_shape_n = [env.observation_space[i].shape for i in range(env.n)]
            num_adversaries = min(env.n, arglist.num_adversaries)
            actors = get_trainers(env, "actor_", num_adversaries, obs_shape_n, arglist, type=0)
            for i in range(num_tasks):
                list_of_taskenv.append(make_env(arglist.scenario, reward_type=arglist.reward_type))
            print('Using good policy {} and adv policy {}'.format(arglist.good_policy, arglist.adv_policy))

            # 1.2 Initialize
            U.initialize()

            model_name = arglist.load_dir.split('/')[-2] + '/'
            path = arglist.pictures_dir_transfer_test + model_name
            mkdir(path)
            for i in range(num_tasks):
                mkdir(os.path.join(path, "task_" + str(i)))
            # 2.1 加载checkpoints
            # model_load_dir = os.path.join(arglist.load_dir, str(model_number * arglist.save_rate), 'model.ckpt')
            # print('From ', model_load_dir, ' Loading previous state...')
            # U.load_state(model_load_dir)

            # 3.1 全局变量初始化
            global_steps = np.zeros(num_tasks)  # global timesteps for each env
            episodes_rewards = [[0.0] for _ in range(num_tasks)]  # 每个元素为在一个episode中所有agents rewards的和
            # agent_rewards[i]中的每个元素记录单个agent在一个episode中所有rewards的和
            agent_rewards = [[[0.0] for _ in range(env.n)] for _ in range(num_tasks)]

            energy_consumptions_for_test = [[] for _ in range(num_tasks)]
            j_index = [[] for _ in range(num_tasks)]
            aver_cover = [[] for _ in range(num_tasks)]
            instantaneous_dis = [[] for _ in range(num_tasks)]
            instantaneous_out_the_map = [[] for _ in range(num_tasks)]
            energy_efficiency = [[] for _ in range(num_tasks)]
            instantaneous_accmulated_reward = [[] for _ in range(num_tasks)]

            # 3.2 局部变量初始化
            local_steps = np.zeros(num_tasks)  # local timesteps for each env
            energy_one_episode = [[] for _ in range(num_tasks)]
            j_index_one_episode = [[] for _ in range(num_tasks)]
            aver_cover_one_episode = [[] for _ in range(num_tasks)]
            over_map_counter = np.zeros(num_tasks)
            over_map_one_episode = [[] for _ in range(num_tasks)]
            disconnected_number_counter = np.zeros(num_tasks)
            disconnected_number_one_episode = [[] for _ in range(num_tasks)]
            episode_reward_step = np.zeros(num_tasks)  # 累加一个episode里每一步的所有智能体的平均reward
            accmulated_reward_one_episode = [[] for _ in range(num_tasks)]
            route_one_episode = [[] for _ in range(num_tasks)]

            bl_coverage = 0.8
            bl_jainindex = 0.8
            bl_loss = 100

            # 3.3 初始化ENV
            obs_n_list = []
            for i in range(num_tasks):
                obs_n = list_of_taskenv[i].reset()
                list_of_taskenv[i].set_map(
                    sample_map(arglist.test_data_dir + arglist.test_data_name + "_" + str(i + 1) + ".h5", random=False))
                obs_n_list.append(obs_n)

            # 3.4
            history_n = [[queue.Queue(arglist.history_length) for _ in range(env.n)] for _ in range(num_tasks)]
            for i in range(num_tasks):
                for j in range(env.n):
                    for _ in range(arglist.history_length):
                        history_n[i][j].put(obs_n_list[i][j])
            # 4 test
            episode_start_time = time.time()
            print('Starting iterations...')
            episode_number = 0
            while True:
                for task_index in range(num_tasks):
                    # 3.1更新环境
                    current_env = list_of_taskenv[task_index]
                    # get action
                    action_n = [agent.action(obs) for agent, obs in zip(actors, history_n[task_index])]
                    # environment step
                    new_obs_n, rew_n, done_n, info_n = current_env.step(action_n)
                    local_steps[task_index] += 1  # 更新局部计数器
                    global_steps[task_index] += 1  # 更新全局计数器
                    done = all(done_n)
                    terminal = (local_steps[task_index] >= arglist.max_episode_len)

                    # 更新obs
                    obs_n_list[task_index] = new_obs_n
                    # 更新reward
                    for i, rew in enumerate(rew_n):
                        episodes_rewards[task_index][-1] += rew
                        agent_rewards[task_index][i][-1] += rew
                    # energy
                    energy_one_episode[task_index].append(current_env.get_energy())
                    # fair index
                    j_index_one_episode[task_index].append(current_env.get_jain_index())
                    # coverage
                    aver_cover_one_episode[task_index].append(current_env.get_aver_cover())
                    # over map counter
                    over_map_counter[task_index] += current_env.get_over_map()
                    over_map_one_episode[task_index].append(over_map_counter[task_index])
                    # disconnected counter
                    disconnected_number_counter[task_index] += current_env.get_dis()
                    disconnected_number_one_episode[task_index].append(disconnected_number_counter[task_index])
                    # reward
                    episode_reward_step[task_index] += np.mean(rew_n)
                    accmulated_reward_one_episode[task_index].append(episode_reward_step[task_index])
                    route = current_env.get_agent_pos()
                    route_one_episode[task_index].append(route)

                    episode_number = math.ceil(global_steps[task_index] / arglist.max_episode_len)
                    if done or terminal:
                        # 记录每个episode的变量
                        energy_consumptions_for_test[task_index].append(energy_one_episode[task_index][-1])  # energy
                        j_index[task_index].append(j_index_one_episode[task_index][-1])  # fairness index
                        aver_cover[task_index].append(aver_cover_one_episode[task_index][-1])  # coverage
                        instantaneous_dis[task_index].append(
                            disconnected_number_one_episode[task_index][-1])  # disconnected
                        instantaneous_out_the_map[task_index].append(
                            over_map_one_episode[task_index][-1])  # out of the map
                        instantaneous_accmulated_reward[task_index].append(
                            accmulated_reward_one_episode[task_index][-1])  # reward
                        energy_efficiency[task_index].append(aver_cover_one_episode[task_index][-1]
                                                             * j_index_one_episode[task_index][-1] /
                                                             energy_one_episode[task_index][-1])  # efficiency

                        episode_end_time = time.time()
                        episode_time = episode_end_time - episode_start_time
                        episode_start_time = episode_end_time
                        print('Task %d, Episode: %d - energy_consumptions: %s, efficiency: %s, time %s' % (
                            task_index,
                            episode_number,
                            str(current_env.get_energy_origin()),
                            str(energy_efficiency[task_index][-1]),
                            str(round(episode_time, 3))))
                        current_path = os.path.join(path, "task_" + str(task_index))
                        if arglist.draw_picture_test:
                            file_path = os.path.join(current_path,
                                                     "random_model_test.log")
                            if episode_number == arglist.num_test_episodes:
                                report = '\nOK===============report=====================\nRadom maddpg Model-testing ' \
                                         + str(arglist.num_test_episodes) + ' episodes\'s result:' \
                                         + '\n!!!Max energy efficiency: ' \
                                         + str(np.max(energy_efficiency[task_index])) \
                                         + '\n!!!Average energy efficiency:' \
                                         + str(np.mean(energy_efficiency[task_index])) \
                                         + '\nAverage average attained coverage: ' \
                                         + str(np.mean(aver_cover[task_index])) + \
                                         '\nAverage Jaint\'s fairness index: ' \
                                         + str(np.mean(j_index[task_index])) + \
                                         '\nAverage normalized average energy consumptions:' \
                                         + str(np.mean(energy_consumptions_for_test[task_index])) \
                                         + "\n" + "==========================end=============================\n"

                                draw_util.drawTest("random",
                                                   current_path+"random_maddpg",
                                                   energy_efficiency[task_index],
                                                   energy_consumptions_for_test[task_index],
                                                   aver_cover[task_index],
                                                   j_index[task_index],
                                                   instantaneous_accmulated_reward[task_index],
                                                   instantaneous_dis[task_index],
                                                   instantaneous_out_the_map[task_index],
                                                   len(aver_cover[task_index]),
                                                   bl_coverage,
                                                   bl_jainindex,
                                                   bl_loss,
                                                   False)
                            else:
                                report = '\nRandom maddpg Model-' \
                                         + '-episode ' + str(episode_number) + ' result:' \
                                         + '\n!!!Energy efficiency: ' \
                                         + str(energy_efficiency[task_index][-1]) \
                                         + '\nAverage attained coverage: ' \
                                         + str(aver_cover[task_index][-1]) + \
                                         '\nJaint\'s fairness index: ' \
                                         + str(j_index[task_index][-1]) + \
                                         '\nnormalized average energy consumptions: ' \
                                         + str(energy_consumptions_for_test[task_index][-1]) \
                                         + "\n"

                            with open(file_path, 'a+') as file:
                                file.write(report)

                        # reset custom statistics variabl between episode and epoch------------------------------------

                        if task_index == num_tasks - 1:
                            energy_one_episode = [[] for _ in range(num_tasks)]
                            j_index_one_episode = [[] for _ in range(num_tasks)]
                            aver_cover_one_episode = [[] for _ in range(num_tasks)]
                            over_map_counter = np.zeros(num_tasks)
                            over_map_one_episode = [[] for _ in range(num_tasks)]
                            disconnected_number_counter = np.zeros(num_tasks)
                            disconnected_number_one_episode = [[] for _ in range(num_tasks)]
                            episode_reward_step = np.zeros(num_tasks)
                            accmulated_reward_one_episode = [[] for _ in range(num_tasks)]
                            route_one_episode = [[] for _ in range(num_tasks)]

                        # 重置局部变量
                        obs_n_list[task_index] = current_env.reset()  # 重置env
                        current_env.set_map(
                            sample_map(
                                arglist.test_data_dir + arglist.test_data_name + "_" + str(task_index + 1) + ".h5",
                                random=False))
                        local_steps[task_index] = 0  # 重置局部计数器

                        # 更新全局变量
                        episodes_rewards[task_index].append(0)  # 添加新的元素
                        for reward in agent_rewards[task_index]:
                            reward.append(0)

                if episode_number > arglist.num_test_episodes:
                    break