Exemple #1
0
def generate_json(cfg):
    flag = True
    path = os.path.expanduser('~\Documents\Airsim')
    if not os.path.exists(path):
        os.makedirs(path)

    filename = path + '\settings.json'

    data = {}

    if cfg.mode == 'move_around':
        data['SimMode'] = 'ComputerVision'
    else:
        data['SettingsVersion'] = 1.2
        data['LocalHostIp'] = cfg.ip_address
        data['SimMode'] = cfg.SimMode
        data['ClockSpeed'] = cfg.ClockSpeed
        data["ViewMode"] = "NoDisplay"
        PawnPaths = {}
        PawnPaths["DefaultQuadrotor"] = {}
        PawnPaths["DefaultQuadrotor"][
            'PawnBP'] = ''' Class'/AirSim/Blueprints/BP_''' + cfg.drone + '''.BP_''' + cfg.drone + '''_C' '''
        data['PawnPaths'] = PawnPaths

        # Define agents:
        _, reset_array_raw, _, _ = initial_positions(cfg.env_name,
                                                     num_agents=cfg.num_agents)
        Vehicles = {}
        if len(reset_array_raw) < cfg.num_agents:
            print(
                "Error: Either reduce the number of agents or add more initial positions"
            )
            flag = False
        else:
            for agents in range(cfg.num_agents):
                name_agent = "drone" + str(agents)
                agent_position = reset_array_raw[name_agent].pop(0)
                Vehicles[name_agent] = {}
                Vehicles[name_agent]["VehicleType"] = "SimpleFlight"
                Vehicles[name_agent]["X"] = agent_position[0]
                Vehicles[name_agent]["Y"] = agent_position[1]
                # Vehicles[name_agent]["Z"] = agent_position[2]
                Vehicles[name_agent]["Z"] = 0
                Vehicles[name_agent]["Yaw"] = agent_position[3]
            data["Vehicles"] = Vehicles

        CameraDefaults = {}
        CameraDefaults['CaptureSettings'] = []
        # CaptureSettings=[]

        camera = {}
        camera['ImageType'] = 0
        camera['Width'] = cfg.width
        camera['Height'] = cfg.height
        camera['FOV_Degrees'] = cfg.fov_degrees

        CameraDefaults['CaptureSettings'].append(camera)

        camera = {}
        camera['ImageType'] = 3
        camera['Width'] = cfg.width
        camera['Height'] = cfg.height
        camera['FOV_Degrees'] = cfg.fov_degrees

        CameraDefaults['CaptureSettings'].append(camera)

        data['CameraDefaults'] = CameraDefaults
    with open(filename, 'w') as outfile:
        json.dump(data, outfile, indent=4)

    return flag
def DeepQLearning(cfg):

    f_x_val = 5.442914962768555
    f_y_val = 1.8743505477905273

    if cfg.mode == 'move_around':
        env_process, env_folder = start_environment(env_name=cfg.env_name)
    else:
        algorithm_cfg = read_cfg(config_filename='configs/DeepQLearning.cfg',
                                 verbose=True)

        if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate' or cfg.mode == 'infer':
            cfg.num_agents = 1

        # Start the environment
        env_process, env_folder = start_environment(env_name=cfg.env_name)
        # Connect to Unreal Engine and get the drone handle: client
        client, old_posit, initZ = connect_drone(ip_address=cfg.ip_address,
                                                 phase=cfg.mode,
                                                 num_agents=cfg.num_agents)
        initial_pos = old_posit.copy()
        # Load the initial positions for the environment
        reset_array, reset_array_raw, level_name, crash_threshold = initial_positions(
            cfg.env_name, initZ, cfg.num_agents)
        # Get memory handler
        process = psutil.Process(getpid())
        # Load PyGame Screen
        screen = pygame_connect(phase=cfg.mode)

        fig_z = []
        fig_nav = []
        debug = False

        # Generate path where the weights will be saved
        cfg, algorithm_cfg = save_network_path(cfg=cfg,
                                               algorithm_cfg=algorithm_cfg)
        current_state = {}
        new_state = {}
        posit = {}
        name_agent_list = []
        agent = {}
        # Replay Memory for RL
        if cfg.mode == 'train':
            ReplayMemory = {}
            target_agent = {}
            for drone in range(cfg.num_agents):
                name_agent = "drone" + str(drone)
                name_agent_list.append(name_agent)
                print_orderly(name_agent, 40)
                ReplayMemory[name_agent] = Memory(algorithm_cfg.buffer_len)
                agent[name_agent] = PedraAgent(algorithm_cfg,
                                               client,
                                               name='DQN',
                                               vehicle_name=name_agent)
                target_agent[name_agent] = PedraAgent(algorithm_cfg,
                                                      client,
                                                      name='Target',
                                                      vehicle_name=name_agent)
                current_state[name_agent] = agent[name_agent].get_state()

        elif cfg.mode == 'infer':
            name_agent = 'drone0'
            name_agent_list.append(name_agent)
            agent[name_agent] = PedraAgent(algorithm_cfg,
                                           client,
                                           name=name_agent + 'DQN',
                                           vehicle_name=name_agent)

            env_cfg = read_cfg(config_filename=env_folder + 'config.cfg')
            nav_x = []
            nav_y = []
            altitude = {}
            altitude[name_agent] = []
            p_z, f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer(
                env_cfg=env_cfg, client=client, env_folder=env_folder)
            nav_text = ax_nav.text(0, 0, '')

        # Initialize variables
        iter = 1
        # num_collisions = 0
        episode = {}
        active = True

        print_interval = 1
        automate = True
        choose = False
        print_qval = False
        last_crash = {}
        ret = {}
        num_collisions = {}
        num_arrive = {}
        level = {}
        level_state = {}
        level_posit = {}
        times_switch = {}
        last_crash_array = {}
        ret_array = {}
        distance_array = {}
        epi_env_array = {}
        log_files = {}

        # If the phase is inference force the num_agents to 1
        hyphens = '-' * int((80 - len('Log files')) / 2)
        print(hyphens + ' ' + 'Log files' + ' ' + hyphens)
        for name_agent in name_agent_list:
            ret[name_agent] = 0
            num_collisions[name_agent] = 0
            last_crash[name_agent] = 0
            level[name_agent] = 0
            episode[name_agent] = 0
            level_state[name_agent] = [None] * len(reset_array[name_agent])
            level_posit[name_agent] = [None] * len(reset_array[name_agent])
            times_switch[name_agent] = 0
            last_crash_array[name_agent] = np.zeros(shape=len(
                reset_array[name_agent]),
                                                    dtype=np.int32)
            ret_array[name_agent] = np.zeros(
                shape=len(reset_array[name_agent]))
            distance_array[name_agent] = np.zeros(
                shape=len(reset_array[name_agent]))
            epi_env_array[name_agent] = np.zeros(shape=len(
                reset_array[name_agent]),
                                                 dtype=np.int32)

            # Log file
            log_path = algorithm_cfg.network_path + '/' + name_agent + '/' + cfg.mode + 'log.txt'
            print("Log path: ", log_path)
            log_files[name_agent] = open(log_path, 'w')

        distance = 0
        switch_env = False

        print_orderly('Simulation begins', 80)

        # save_posit = old_posit

        # last_crash_array = np.zeros(shape=len(level_name), dtype=np.int32)
        # ret_array = np.zeros(shape=len(level_name))
        # distance_array = np.zeros(shape=len(level_name))
        # epi_env_array = np.zeros(shape=len(level_name), dtype=np.int32)

        while active:
            try:
                active, automate, algorithm_cfg, client = check_user_input(
                    active, automate, agent[name_agent], client,
                    old_posit[name_agent], initZ, fig_z, fig_nav, env_folder,
                    cfg, algorithm_cfg)

                if automate:

                    if cfg.mode == 'train':

                        if iter % algorithm_cfg.switch_env_steps == 0:
                            switch_env = True
                        else:
                            switch_env = False

                        for name_agent in name_agent_list:

                            start_time = time.time()
                            if switch_env:
                                posit1_old = client.simGetVehiclePose(
                                    vehicle_name=name_agent)
                                times_switch[
                                    name_agent] = times_switch[name_agent] + 1
                                level_state[name_agent][level[
                                    name_agent]] = current_state[name_agent]
                                level_posit[name_agent][
                                    level[name_agent]] = posit1_old
                                last_crash_array[name_agent][
                                    level[name_agent]] = last_crash[name_agent]
                                ret_array[name_agent][
                                    level[name_agent]] = ret[name_agent]
                                distance_array[name_agent][
                                    level[name_agent]] = distance
                                epi_env_array[name_agent][
                                    level[name_agent]] = episode[name_agent]

                                level[name_agent] = (
                                    level[name_agent] + 1) % len(
                                        reset_array[name_agent])

                                print(
                                    name_agent + ' :Transferring to level: ',
                                    level[name_agent], ' - ',
                                    level_name[name_agent][level[name_agent]])

                                if times_switch[name_agent] < len(
                                        reset_array[name_agent]):
                                    reset_to_initial(level[name_agent],
                                                     reset_array,
                                                     client,
                                                     vehicle_name=name_agent)
                                else:
                                    current_state[name_agent] = level_state[
                                        name_agent][level[name_agent]]
                                    posit1_old = level_posit[name_agent][
                                        level[name_agent]]
                                    reset_to_initial(level[name_agent],
                                                     reset_array,
                                                     client,
                                                     vehicle_name=name_agent)
                                    client.simSetVehiclePose(
                                        posit1_old,
                                        ignore_collison=True,
                                        vehicle_name=name_agent)
                                    time.sleep(0.1)

                                last_crash[name_agent] = last_crash_array[
                                    name_agent][level[name_agent]]
                                ret[name_agent] = ret_array[name_agent][
                                    level[name_agent]]
                                distance = distance_array[name_agent][
                                    level[name_agent]]
                                episode[name_agent] = epi_env_array[
                                    name_agent][int(level[name_agent] / 3)]
                                # environ = environ^True
                            else:

                                action, action_type, algorithm_cfg.epsilon, qvals = policy(
                                    algorithm_cfg.epsilon,
                                    current_state[name_agent], iter,
                                    algorithm_cfg.epsilon_saturation,
                                    algorithm_cfg.epsilon_model,
                                    algorithm_cfg.wait_before_train,
                                    algorithm_cfg.num_actions,
                                    agent[name_agent])

                                action_word = translate_action(
                                    action, algorithm_cfg.num_actions)
                                # Take the action
                                agent[name_agent].take_action(
                                    action,
                                    algorithm_cfg.num_actions,
                                    SimMode=cfg.SimMode)
                                # time.sleep(0.05)
                                new_state[name_agent] = agent[
                                    name_agent].get_state()
                                new_depth1, thresh = agent[
                                    name_agent].get_depth(cfg)

                                # Get GPS information
                                posit[name_agent] = client.simGetVehiclePose(
                                    vehicle_name=name_agent)
                                position = posit[name_agent].position
                                old_p = np.array([
                                    old_posit[name_agent].position.x_val,
                                    old_posit[name_agent].position.y_val
                                ])
                                new_p = np.array(
                                    [position.x_val, position.y_val])

                                # calculate distance
                                distance = distance + np.linalg.norm(new_p -
                                                                     old_p)
                                old_posit[name_agent] = posit[name_agent]

                                #reward, crash = agent[name_agent].reward_gen(new_depth1, action, crash_threshold, thresh, debug, cfg)
                                #reward2, arrive = agent[name_agent].reward_gen2(start_time)
                                reward, crash, arrive = agent[
                                    name_agent].reward_gen(
                                        new_depth1, action, crash_threshold,
                                        thresh, debug,
                                        posit[name_agent].position.x_val,
                                        posit[name_agent].position.y_val,
                                        f_x_val, f_y_val, start_time, cfg)
                                ret[name_agent] = ret[name_agent] + reward
                                #ret[name_agent] = ret[name_agent] + reward2
                                agent_state = agent[name_agent].GetAgentState()
                                if agent_state.has_collided:
                                    num_collisions[
                                        name_agent] = num_collisions[
                                            name_agent] + 1
                                    print('crash')
                                    crash = True
                                    reward = -1000

                                #if (posit[name_agent].position.x_val == f_x_val) and (posit[name_agent].position.y_val == f_y_val):
                                #num_arrive[name_agent] = num_arrive[name_agent] + 1
                                #print("Arrived")
                                #arrive = True
                                #reward = 1000000

                                data_tuple = []
                                data_tuple.append([
                                    current_state[name_agent], action,
                                    new_state[name_agent], reward, crash
                                ])
                                err = get_errors(data_tuple, choose,
                                                 ReplayMemory[name_agent],
                                                 algorithm_cfg.input_size,
                                                 agent[name_agent],
                                                 target_agent[name_agent],
                                                 algorithm_cfg.gamma,
                                                 algorithm_cfg.Q_clip)
                                ReplayMemory[name_agent].add(err, data_tuple)
                                # Train if sufficient frames have been stored
                                if iter > algorithm_cfg.wait_before_train:
                                    if iter % algorithm_cfg.train_interval == 0:
                                        # Train the RL network
                                        old_states, Qvals, actions, err, idx = minibatch_double(
                                            data_tuple,
                                            algorithm_cfg.batch_size, choose,
                                            ReplayMemory[name_agent],
                                            algorithm_cfg.input_size,
                                            agent[name_agent],
                                            target_agent[name_agent],
                                            algorithm_cfg.gamma,
                                            algorithm_cfg.Q_clip)

                                        for i in range(
                                                algorithm_cfg.batch_size):
                                            ReplayMemory[name_agent].update(
                                                idx[i], err[i])

                                        if print_qval:
                                            print(Qvals)

                                        if choose:
                                            # Double-DQN
                                            target_agent[name_agent].train_n(
                                                old_states, Qvals, actions,
                                                algorithm_cfg.batch_size,
                                                algorithm_cfg.dropout_rate,
                                                algorithm_cfg.learning_rate,
                                                algorithm_cfg.epsilon, iter)
                                        else:
                                            agent[name_agent].train_n(
                                                old_states, Qvals, actions,
                                                algorithm_cfg.batch_size,
                                                algorithm_cfg.dropout_rate,
                                                algorithm_cfg.learning_rate,
                                                algorithm_cfg.epsilon, iter)

                                # iter += 1

                                time_exec = time.time() - start_time

                                mem_percent = process.memory_info()[0] / 2.**30

                                s_log = '{:<6s} - Level {:>2d} - Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} Mem = {:<5.4f}  Reward: {:<+1.4f}  '.format(
                                    name_agent, int(level[name_agent]), iter,
                                    episode[name_agent], action_word,
                                    action_type, algorithm_cfg.epsilon,
                                    algorithm_cfg.learning_rate,
                                    ret[name_agent], last_crash[name_agent],
                                    time_exec, mem_percent, reward)

                                if iter % print_interval == 0:
                                    print(s_log)
                                log_files[name_agent].write(s_log + '\n')

                                last_crash[
                                    name_agent] = last_crash[name_agent] + 1
                                if debug:
                                    cv2.imshow(
                                        name_agent,
                                        np.hstack(
                                            (np.squeeze(
                                                current_state[name_agent],
                                                axis=0),
                                             np.squeeze(new_state[name_agent],
                                                        axis=0))))
                                    cv2.waitKey(1)

                                if crash:
                                    agent[name_agent].return_plot(
                                        ret[name_agent], episode[name_agent],
                                        int(level[name_agent] / 3),
                                        mem_percent, iter, distance)
                                    ret[name_agent] = 0
                                    distance = 0
                                    episode[
                                        name_agent] = episode[name_agent] + 1
                                    last_crash[name_agent] = 0

                                    reset_to_initial(level[name_agent],
                                                     reset_array,
                                                     client,
                                                     vehicle_name=name_agent)
                                    # time.sleep(0.2)
                                    current_state[name_agent] = agent[
                                        name_agent].get_state()
                                else:
                                    #current_state[name_agent] = new_state[name_agent]
                                    if arrive:
                                        episode[name_agent] = episode[
                                            name_agent] + 1
                                        ret[name_agent] = 0
                                        distance = 0
                                        last_crash[name_agent] = 0
                                        reset_to_initial(
                                            level[name_agent],
                                            reset_array,
                                            client,
                                            vehicle_name=name_agent)
                                        # time.sleep(0.2)
                                        current_state[name_agent] = agent[
                                            name_agent].get_state()
                                    else:
                                        current_state[name_agent] = new_state[
                                            name_agent]

                                if iter % algorithm_cfg.max_iters == 0:
                                    automate = False

                                # if iter >140:
                                #     active=False
                        if iter % algorithm_cfg.communication_interval == 0 and iter > algorithm_cfg.wait_before_train:
                            print(
                                'Communicating the weights and averaging them')
                            communicate_across_agents(agent, name_agent_list,
                                                      algorithm_cfg)
                            communicate_across_agents(target_agent,
                                                      name_agent_list,
                                                      algorithm_cfg)

                        if iter % algorithm_cfg.update_target_interval == 0 and iter > algorithm_cfg.wait_before_train:
                            for name_agent in name_agent_list:
                                agent[name_agent].take_action(
                                    [-1],
                                    algorithm_cfg.num_actions,
                                    SimMode=cfg.SimMode)
                                print(name_agent +
                                      ' - Switching Target Network')
                                agent[name_agent].save_network(
                                    algorithm_cfg.network_path)

                            choose = not choose

                        iter += 1

                    elif cfg.mode == 'infer':
                        # Inference phase
                        agent_state = agent[name_agent].GetAgentState()
                        if agent_state.has_collided:
                            print('Drone collided')
                            print("Total distance traveled: ",
                                  np.round(distance, 2))
                            active = False
                            client.moveByVelocityAsync(
                                vx=0,
                                vy=0,
                                vz=0,
                                duration=1,
                                vehicle_name=name_agent).join()

                            if nav_x:  # Nav_x is empty if the drone collides in first iteration
                                ax_nav.plot(nav_x.pop(),
                                            nav_y.pop(),
                                            'r*',
                                            linewidth=20)
                            file_path = env_folder + 'results/'
                            fig_z.savefig(file_path + 'altitude_variation.png',
                                          dpi=500)
                            fig_nav.savefig(file_path + 'navigation.png',
                                            dpi=500)
                            close_env(env_process)
                            print('Figures saved')
                        else:
                            posit[name_agent] = client.simGetVehiclePose(
                                vehicle_name=name_agent)
                            distance = distance + np.linalg.norm(
                                np.array([
                                    old_posit[name_agent].position.x_val -
                                    posit[name_agent].position.x_val,
                                    old_posit[name_agent].position.y_val -
                                    posit[name_agent].position.y_val
                                ]))
                            # altitude[name_agent].append(-posit[name_agent].position.z_val+p_z)
                            altitude[name_agent].append(
                                -posit[name_agent].position.z_val - f_z)

                            quat = (posit[name_agent].orientation.w_val,
                                    posit[name_agent].orientation.x_val,
                                    posit[name_agent].orientation.y_val,
                                    posit[name_agent].orientation.z_val)
                            yaw = euler_from_quaternion(quat)[2]

                            x_val = posit[name_agent].position.x_val
                            y_val = posit[name_agent].position.y_val
                            z_val = posit[name_agent].position.z_val

                            nav_x.append(env_cfg.alpha * x_val + env_cfg.o_x)
                            nav_y.append(env_cfg.alpha * y_val + env_cfg.o_y)
                            nav.set_data(nav_x, nav_y)
                            nav_text.remove()
                            nav_text = ax_nav.text(25,
                                                   55,
                                                   'Distance: ' +
                                                   str(np.round(distance, 2)),
                                                   style='italic',
                                                   bbox={
                                                       'facecolor': 'white',
                                                       'alpha': 0.5
                                                   })

                            line_z.set_data(
                                np.arange(len(altitude[name_agent])),
                                altitude[name_agent])
                            ax_z.set_xlim(0, len(altitude[name_agent]))
                            fig_z.canvas.draw()
                            fig_z.canvas.flush_events()

                            current_state[name_agent] = agent[
                                name_agent].get_state()
                            action, action_type, algorithm_cfg.epsilon, qvals = policy(
                                1, current_state[name_agent], iter,
                                algorithm_cfg.epsilon_saturation, 'inference',
                                algorithm_cfg.wait_before_train,
                                algorithm_cfg.num_actions, agent[name_agent])
                            action_word = translate_action(
                                action, algorithm_cfg.num_actions)
                            # Take continuous action
                            agent[name_agent].take_action(
                                action,
                                algorithm_cfg.num_actions,
                                SimMode=cfg.SimMode)
                            old_posit[name_agent] = posit[name_agent]

                            # Verbose and log making
                            s_log = 'Position = ({:<3.2f},{:<3.2f}, {:<3.2f}) Orientation={:<1.3f} Predicted Action: {:<8s}  '.format(
                                x_val, y_val, z_val, yaw, action_word)

                            print(s_log)
                            log_files[name_agent].write(s_log + '\n')

            except Exception as e:
                if str(
                        e
                ) == 'cannot reshape array of size 1 into shape (0,0,3)':
                    print('Recovering from AirSim error')
                    client, old_posit, initZ = connect_drone(
                        ip_address=cfg.ip_address,
                        phase=cfg.mode,
                        num_agents=cfg.num_agents)

                    agent[name_agent].client = client
                else:
                    print('------------- Error -------------')
                    exc_type, exc_obj, exc_tb = sys.exc_info()
                    fname = os.path.split(
                        exc_tb.tb_frame.f_code.co_filename)[1]
                    print(exc_type, fname, exc_tb.tb_lineno)
                    print(exc_obj)
                    automate = False
                    print('Hit r and then backspace to start from this point')
Exemple #3
0
def DeepQLearning(cfg, env_process, env_folder):
    algorithm_cfg = read_cfg(config_filename='configs/DeepQLearning.cfg',
                             verbose=True)
    algorithm_cfg.algorithm = cfg.algorithm

    if 'GlobalLearningGlobalUpdate-SA' in algorithm_cfg.distributed_algo:
        # algorithm_cfg = update_algorithm_cfg(algorithm_cfg, cfg)
        cfg.num_agents = 1

    # # Start the environment
    # env_process, env_folder = start_environment(env_name=cfg.env_name)
    # Connect to Unreal Engine and get the drone handle: client
    client, old_posit, initZ = connect_drone(ip_address=cfg.ip_address,
                                             phase=cfg.mode,
                                             num_agents=cfg.num_agents)
    print(old_posit)
    initial_pos = old_posit.copy()
    # Load the initial positions for the environment
    # if Target:
    if cfg.Target:
        print("Traget Iniitial")
        reset_array, reset_array_raw, level_name, crash_threshold = initial_positions(
            cfg.env_name, initZ, cfg.num_agents)
    else:
        print("Source Iniitial")
        reset_array, reset_array_raw, level_name, crash_threshold = initial_positions2(
            cfg.env_name + "2", initZ, cfg.num_agents)
        # ""

    print("reset_array", reset_array)
    print("reset_array_raw", reset_array_raw)
    print("level_name", level_name)
    print("crash_threshold", crash_threshold)

    # Initialize System Handlers
    process = psutil.Process(getpid())
    # nvidia_smi.nvmlInit()

    # Load PyGame Screen
    screen = pygame_connect(phase=cfg.mode)

    fig_z = []
    fig_nav = []
    debug = False
    # Generate path where the weights will be saved
    cfg, algorithm_cfg = save_network_path(cfg=cfg,
                                           algorithm_cfg=algorithm_cfg)
    current_state = {}
    new_state = {}
    posit = {}
    name_agent_list = []
    agent = {}
    # Replay Memory for RL
    if cfg.mode == 'train':
        ReplayMemory = {}
        target_agent = {}

        if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA':
            print_orderly('global', 40)
            # Multiple agent acts as data collecter and one global learner
            global_agent = PedraAgent(algorithm_cfg,
                                      client,
                                      name='DQN',
                                      vehicle_name='global',
                                      Target=cfg.Target,
                                      data_collect=cfg.data_collect)
            ReplayMemory = Memory(algorithm_cfg.buffer_len)
            target_agent = PedraAgent(algorithm_cfg,
                                      client,
                                      name='Target',
                                      vehicle_name='global',
                                      Target=cfg.Target)

        for drone in range(cfg.num_agents):
            name_agent = "drone" + str(drone)
            name_agent_list.append(name_agent)
            print_orderly(name_agent, 40)
            # TODO: turn the neural network off if global agent is present
            agent[name_agent] = PedraAgent(algorithm_cfg,
                                           client,
                                           name='DQN',
                                           vehicle_name=name_agent,
                                           Target=cfg.Target,
                                           data_collect=cfg.data_collect)

            if algorithm_cfg.distributed_algo != 'GlobalLearningGlobalUpdate-MA':
                ReplayMemory[name_agent] = Memory(algorithm_cfg.buffer_len)
                target_agent[name_agent] = PedraAgent(algorithm_cfg,
                                                      client,
                                                      name='Target',
                                                      vehicle_name=name_agent,
                                                      Target=cfg.Target)
            current_state[name_agent] = agent[name_agent].get_state()

    elif cfg.mode == 'infer':
        name_agent = 'drone0'
        name_agent_list.append(name_agent)
        agent[name_agent] = PedraAgent(algorithm_cfg,
                                       client,
                                       name=name_agent + 'DQN',
                                       vehicle_name=name_agent,
                                       Target=cfg.Target)

        env_cfg = read_cfg(config_filename=env_folder + 'config_' +
                           ("Target" if cfg.Target else "Source") + '.cfg')
        nav_x = []
        nav_y = []
        altitude = {}
        altitude[name_agent] = []
        p_z, f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer(
            env_cfg=env_cfg, client=client, env_folder=env_folder)
        nav_text = ax_nav.text(0, 0, '')

        # Select initial position
        reset_to_initial(0, reset_array, client, vehicle_name=name_agent)
        old_posit[name_agent] = client.simGetVehiclePose(
            vehicle_name=name_agent)

    # Initialize variables
    iter = 1
    # num_collisions = 0
    episode = {}
    active = True

    print_interval = 1
    automate = True
    choose = False
    print_qval = False
    last_crash = {}
    ret = {}
    distance = {}
    num_collisions = {}
    level = {}
    level_state = {}
    level_posit = {}
    times_switch = {}
    last_crash_array = {}
    ret_array = {}
    distance_array = {}
    epi_env_array = {}
    log_files = {}

    # If the phase is inference force the num_agents to 1
    hyphens = '-' * int((80 - len('Log files')) / 2)
    print(hyphens + ' ' + 'Log files' + ' ' + hyphens)
    for name_agent in name_agent_list:
        ret[name_agent] = 0
        num_collisions[name_agent] = 0
        last_crash[name_agent] = 0
        level[name_agent] = 0
        episode[name_agent] = 0
        level_state[name_agent] = [None] * len(reset_array[name_agent])
        level_posit[name_agent] = [None] * len(reset_array[name_agent])
        times_switch[name_agent] = 0
        last_crash_array[name_agent] = np.zeros(shape=len(
            reset_array[name_agent]),
                                                dtype=np.int32)
        ret_array[name_agent] = np.zeros(shape=len(reset_array[name_agent]))
        distance_array[name_agent] = np.zeros(
            shape=len(reset_array[name_agent]))
        epi_env_array[name_agent] = np.zeros(shape=len(
            reset_array[name_agent]),
                                             dtype=np.int32)
        distance[name_agent] = 0
        # Log file
        log_path = algorithm_cfg.network_path + '/' + name_agent + '/' + cfg.mode + 'log.txt'
        print("Log path: ", log_path)
        log_files[name_agent] = open(log_path, 'w')

    print_orderly('Simulation begins', 80)

    while active:
        try:
            active, automate, algorithm_cfg, client = check_user_input(
                active, automate, agent[name_agent], client,
                old_posit[name_agent], initZ, fig_z, fig_nav, env_folder, cfg,
                algorithm_cfg)

            if automate:

                if cfg.mode == 'train':

                    if iter % algorithm_cfg.switch_env_steps == 0:
                        switch_env = True
                    else:
                        switch_env = False

                    for name_agent in name_agent_list:

                        start_time = time.time()
                        if switch_env:
                            posit1_old = client.simGetVehiclePose(
                                vehicle_name=name_agent)
                            times_switch[
                                name_agent] = times_switch[name_agent] + 1
                            level_state[name_agent][
                                level[name_agent]] = current_state[name_agent]
                            level_posit[name_agent][
                                level[name_agent]] = posit1_old
                            last_crash_array[name_agent][
                                level[name_agent]] = last_crash[name_agent]
                            ret_array[name_agent][
                                level[name_agent]] = ret[name_agent]
                            distance_array[name_agent][
                                level[name_agent]] = distance[name_agent]
                            epi_env_array[name_agent][
                                level[name_agent]] = episode[name_agent]

                            level[name_agent] = (level[name_agent] + 1) % len(
                                reset_array[name_agent])

                            print(name_agent + ' :Transferring to level: ',
                                  level[name_agent], ' - ',
                                  level_name[name_agent][level[name_agent]])

                            if times_switch[name_agent] < len(
                                    reset_array[name_agent]):
                                reset_to_initial(level[name_agent],
                                                 reset_array,
                                                 client,
                                                 vehicle_name=name_agent)
                            else:
                                current_state[name_agent] = level_state[
                                    name_agent][level[name_agent]]
                                posit1_old = level_posit[name_agent][
                                    level[name_agent]]
                                reset_to_initial(level[name_agent],
                                                 reset_array,
                                                 client,
                                                 vehicle_name=name_agent)
                                client.simSetVehiclePose(
                                    posit1_old,
                                    ignore_collison=True,
                                    vehicle_name=name_agent)
                                time.sleep(0.1)

                            last_crash[name_agent] = last_crash_array[
                                name_agent][level[name_agent]]
                            ret[name_agent] = ret_array[name_agent][
                                level[name_agent]]
                            distance[name_agent] = distance_array[name_agent][
                                level[name_agent]]
                            episode[name_agent] = epi_env_array[name_agent][
                                int(level[name_agent] / 3)]
                            # environ = environ^True
                        else:
                            # TODO: policy from one global agent: DONE
                            if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA':
                                agent_this_drone = global_agent
                                ReplayMemory_this_drone = ReplayMemory
                                target_agent_this_drone = target_agent
                            else:
                                agent_this_drone = agent[name_agent]
                                ReplayMemory_this_drone = ReplayMemory[
                                    name_agent]
                                target_agent_this_drone = target_agent[
                                    name_agent]

                            action, action_type, algorithm_cfg.epsilon, qvals = policy(
                                algorithm_cfg.epsilon,
                                current_state[name_agent], iter,
                                algorithm_cfg.epsilon_saturation,
                                algorithm_cfg.epsilon_model,
                                algorithm_cfg.wait_before_train,
                                algorithm_cfg.num_actions, agent_this_drone)

                            action_word = translate_action(
                                action, algorithm_cfg.num_actions)
                            # Take the action
                            agent[name_agent].take_action(
                                action,
                                algorithm_cfg.num_actions,
                                Mode='static')
                            # time.sleep(0.05)
                            new_state[name_agent] = agent[
                                name_agent].get_state()
                            new_depth1, thresh = agent[
                                name_agent].get_CustomDepth(cfg)

                            # Get GPS information
                            posit[name_agent] = client.simGetVehiclePose(
                                vehicle_name=name_agent)
                            position = posit[name_agent].position
                            old_p = np.array([
                                old_posit[name_agent].position.x_val,
                                old_posit[name_agent].position.y_val
                            ])
                            new_p = np.array([position.x_val, position.y_val])

                            # calculate distance
                            distance[name_agent] = distance[
                                name_agent] + np.linalg.norm(new_p - old_p)
                            old_posit[name_agent] = posit[name_agent]

                            reward, crash = agent[name_agent].reward_gen(
                                new_depth1, action, crash_threshold, thresh,
                                debug, cfg)

                            ret[name_agent] = ret[name_agent] + reward
                            agent_state = agent[name_agent].GetAgentState()

                            if agent_state.has_collided or distance[
                                    name_agent] < 0.1:
                                num_collisions[name_agent] = num_collisions[
                                    name_agent] + 1
                                print('crash')
                                crash = True
                                reward = -1
                            data_tuple = []
                            data_tuple.append([
                                current_state[name_agent], action,
                                new_state[name_agent], reward, crash
                            ])
                            # TODO: one replay memory global, target_agent, agent: DONE
                            err = get_errors(
                                data_tuple, choose, ReplayMemory_this_drone,
                                algorithm_cfg.input_size, agent_this_drone,
                                target_agent_this_drone, algorithm_cfg.gamma,
                                algorithm_cfg.Q_clip)
                            ReplayMemory_this_drone.add(err, data_tuple)

                            # Train if sufficient frames have been stored
                            if iter > algorithm_cfg.wait_before_train:
                                if iter % algorithm_cfg.train_interval == 0:
                                    # Train the RL network
                                    old_states, Qvals, actions, err, idx = minibatch_double(
                                        data_tuple, algorithm_cfg.batch_size,
                                        choose, ReplayMemory_this_drone,
                                        algorithm_cfg.input_size,
                                        agent_this_drone,
                                        target_agent_this_drone,
                                        algorithm_cfg.gamma,
                                        algorithm_cfg.Q_clip)
                                    # TODO global replay memory: DONE
                                    for i in range(algorithm_cfg.batch_size):
                                        ReplayMemory_this_drone.update(
                                            idx[i], err[i])

                                    if print_qval:
                                        print(Qvals)
                                    # TODO global agent, target_agent: DONE
                                    if choose:
                                        # Double-DQN
                                        target_agent_this_drone.network_model.train_n(
                                            old_states, Qvals, actions,
                                            algorithm_cfg.batch_size,
                                            algorithm_cfg.dropout_rate,
                                            algorithm_cfg.learning_rate,
                                            algorithm_cfg.epsilon, iter)
                                    else:
                                        agent_this_drone.network_model.train_n(
                                            old_states, Qvals, actions,
                                            algorithm_cfg.batch_size,
                                            algorithm_cfg.dropout_rate,
                                            algorithm_cfg.learning_rate,
                                            algorithm_cfg.epsilon, iter)

                            time_exec = time.time() - start_time
                            gpu_memory, gpu_utilization, sys_memory = get_SystemStats(
                                process, cfg.NVIDIA_GPU)

                            for i in range(0, len(gpu_memory)):
                                tag_mem = 'GPU' + str(i) + '-Memory-GB'
                                tag_util = 'GPU' + str(i) + 'Utilization-%'
                                agent[
                                    name_agent].network_model.log_to_tensorboard(
                                        tag=tag_mem,
                                        group='SystemStats',
                                        value=gpu_memory[i],
                                        index=iter)
                                agent[
                                    name_agent].network_model.log_to_tensorboard(
                                        tag=tag_util,
                                        group='SystemStats',
                                        value=gpu_utilization[i],
                                        index=iter)
                            agent[name_agent].network_model.log_to_tensorboard(
                                tag='Memory-GB',
                                group='SystemStats',
                                value=sys_memory,
                                index=iter)

                            s_log = '{:<6s} - Level {:>2d} - Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} SF = {:<5.4f}  Reward: {:<+1.4f}  '.format(
                                name_agent, int(level[name_agent]), iter,
                                episode[name_agent], action_word, action_type,
                                algorithm_cfg.epsilon,
                                algorithm_cfg.learning_rate, ret[name_agent],
                                last_crash[name_agent], time_exec,
                                distance[name_agent], reward)

                            if iter % print_interval == 0:
                                print(s_log)
                            log_files[name_agent].write(s_log + '\n')

                            last_crash[name_agent] = last_crash[name_agent] + 1
                            if debug:
                                cv2.imshow(
                                    name_agent,
                                    np.hstack(
                                        (np.squeeze(current_state[name_agent],
                                                    axis=0),
                                         np.squeeze(new_state[name_agent],
                                                    axis=0))))
                                cv2.waitKey(1)

                            if crash:
                                if distance[name_agent] < 0.01:
                                    # Drone won't move, reconnect
                                    print(
                                        'Recovering from drone mobility issue')

                                    agent[
                                        name_agent].client, old_posit, initZ = connect_drone(
                                            ip_address=cfg.ip_address,
                                            phase=cfg.mode,
                                            num_agents=cfg.num_agents,
                                            client=client)
                                else:

                                    agent[
                                        name_agent].network_model.log_to_tensorboard(
                                            tag='Return',
                                            group=name_agent,
                                            value=ret[name_agent],
                                            index=episode[name_agent])
                                    agent[
                                        name_agent].network_model.log_to_tensorboard(
                                            tag='Safe Flight',
                                            group=name_agent,
                                            value=distance[name_agent],
                                            index=episode[name_agent])

                                    ret[name_agent] = 0
                                    distance[name_agent] = 0
                                    episode[
                                        name_agent] = episode[name_agent] + 1
                                    last_crash[name_agent] = 0

                                    reset_to_initial(level[name_agent],
                                                     reset_array,
                                                     client,
                                                     vehicle_name=name_agent)
                                    # time.sleep(0.2)
                                    current_state[name_agent] = agent[
                                        name_agent].get_state()
                                    old_posit[
                                        name_agent] = client.simGetVehiclePose(
                                            vehicle_name=name_agent)
                            else:
                                current_state[name_agent] = new_state[
                                    name_agent]

                            if iter % algorithm_cfg.max_iters == 0:
                                automate = False

                    # TODO define and state agents
                    if iter % algorithm_cfg.update_target_interval == 0 and iter > algorithm_cfg.wait_before_train:

                        if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA':
                            print('global' + ' - Switching Target Network')
                            global_agent.network_model.save_network(
                                algorithm_cfg.network_path,
                                episode[name_agent])
                        else:
                            for name_agent in name_agent_list:
                                agent[name_agent].take_action(
                                    [-1],
                                    algorithm_cfg.num_actions,
                                    Mode='static')
                                print(name_agent +
                                      ' - Switching Target Network')
                                agent[name_agent].network_model.save_network(
                                    algorithm_cfg.network_path,
                                    episode[name_agent])

                        choose = not choose

                    # if iter % algorithm_cfg.communication_interval == 0 and iter > algorithm_cfg.wait_before_train:
                    #     print('Communicating the weights and averaging them')
                    #     communicate_across_agents(agent, name_agent_list, algorithm_cfg)
                    #     communicate_across_agents(target_agent, name_agent_list, algorithm_cfg)

                    iter += 1

                elif cfg.mode == 'infer':
                    # Inference phase
                    agent_state = agent[name_agent].GetAgentState()
                    if agent_state.has_collided:
                        print('Drone collided')
                        print("Total distance traveled: ",
                              np.round(distance[name_agent], 2))
                        active = False
                        client.moveByVelocityAsync(
                            vx=0,
                            vy=0,
                            vz=0,
                            duration=1,
                            vehicle_name=name_agent).join()

                        if nav_x:  # Nav_x is empty if the drone collides in first iteration
                            ax_nav.plot(nav_x.pop(),
                                        nav_y.pop(),
                                        'r*',
                                        linewidth=20)
                        file_path = env_folder + 'results/'
                        fig_z.savefig(file_path + 'altitude_variation.png',
                                      dpi=500)
                        fig_nav.savefig(file_path + 'navigation.png', dpi=500)
                        close_env(env_process)
                        print('Figures saved')
                    else:
                        posit[name_agent] = client.simGetVehiclePose(
                            vehicle_name=name_agent)
                        distance[name_agent] = distance[
                            name_agent] + np.linalg.norm(
                                np.array([
                                    old_posit[name_agent].position.x_val -
                                    posit[name_agent].position.x_val,
                                    old_posit[name_agent].position.y_val -
                                    posit[name_agent].position.y_val
                                ]))
                        # altitude[name_agent].append(-posit[name_agent].position.z_val+p_z)
                        altitude[name_agent].append(
                            -posit[name_agent].position.z_val - f_z)

                        quat = (posit[name_agent].orientation.w_val,
                                posit[name_agent].orientation.x_val,
                                posit[name_agent].orientation.y_val,
                                posit[name_agent].orientation.z_val)
                        yaw = euler_from_quaternion(quat)[2]

                        x_val = posit[name_agent].position.x_val
                        y_val = posit[name_agent].position.y_val
                        z_val = posit[name_agent].position.z_val

                        nav_x.append(env_cfg.alpha * x_val + env_cfg.o_x)
                        nav_y.append(env_cfg.alpha * y_val + env_cfg.o_y)
                        nav.set_data(nav_x, nav_y)
                        nav_text.remove()
                        nav_text = ax_nav.text(
                            25,
                            55,
                            'Distance: ' +
                            str(np.round(distance[name_agent], 2)),
                            style='italic',
                            bbox={
                                'facecolor': 'white',
                                'alpha': 0.5
                            })

                        line_z.set_data(np.arange(len(altitude[name_agent])),
                                        altitude[name_agent])
                        ax_z.set_xlim(0, len(altitude[name_agent]))
                        fig_z.canvas.draw()
                        fig_z.canvas.flush_events()

                        current_state[name_agent] = agent[
                            name_agent].get_state()
                        action, action_type, algorithm_cfg.epsilon, qvals = policy(
                            1, current_state[name_agent], iter,
                            algorithm_cfg.epsilon_saturation, 'inference',
                            algorithm_cfg.wait_before_train,
                            algorithm_cfg.num_actions, agent[name_agent])
                        action_word = translate_action(
                            action, algorithm_cfg.num_actions)
                        # Take continuous action
                        agent[name_agent].take_action(
                            action, algorithm_cfg.num_actions, Mode='static')
                        old_posit[name_agent] = posit[name_agent]

                        # Verbose and log making
                        s_log = 'Position = ({:<3.2f},{:<3.2f}, {:<3.2f}) Orientation={:<1.3f} Predicted Action: {:<8s}  '.format(
                            x_val, y_val, z_val, yaw, action_word)

                        print(s_log)
                        log_files[name_agent].write(s_log + '\n')

        except Exception as e:
            if str(e) == 'cannot reshape array of size 1 into shape (0,0,3)':
                print('Recovering from AirSim error')
                client, old_posit, initZ = connect_drone(
                    ip_address=cfg.ip_address,
                    phase=cfg.mode,
                    num_agents=cfg.num_agents)

                agent[name_agent].client = client
            else:
                print('------------- Error -------------')
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                print(exc_type, fname, exc_tb.tb_lineno)
                print(exc_obj)
                automate = False
                print('Hit r and then backspace to start from this point')
Exemple #4
0
def DeepREINFORCE(cfg, env_process, env_folder):
    algorithm_cfg = read_cfg(config_filename='configs/DeepREINFORCE.cfg',
                             verbose=True)
    algorithm_cfg.algorithm = cfg.algorithm

    if 'GlobalLearningGlobalUpdate-SA' in algorithm_cfg.distributed_algo:
        # algorithm_cfg = update_algorithm_cfg(algorithm_cfg, cfg)
        cfg.num_agents = 1
    client = []
    # Connect to Unreal Engine and get the drone handle: client
    client, old_posit, initZ = connect_drone(ip_address=cfg.ip_address,
                                             phase=cfg.mode,
                                             num_agents=cfg.num_agents,
                                             client=client)
    initial_pos = old_posit.copy()
    # Load the initial positions for the environment
    reset_array, reset_array_raw, level_name, crash_threshold = initial_positions(
        cfg.env_name, initZ, cfg.num_agents)

    # Initialize System Handlers
    process = psutil.Process(getpid())

    # Load PyGame Screen
    screen = pygame_connect(phase=cfg.mode)

    fig_z = []
    fig_nav = []
    debug = False
    # Generate path where the weights will be saved
    cfg, algorithm_cfg = save_network_path(cfg=cfg,
                                           algorithm_cfg=algorithm_cfg)
    current_state = {}
    new_state = {}
    posit = {}
    name_agent_list = []
    data_tuple = {}
    agent = {}
    epi_num = {}
    if cfg.mode == 'train':
        iter = {}
        wait_for_others = {}
        if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA':
            print_orderly('global', 40)
            # Multiple agent acts as data collecter and one global learner
            global_agent = PedraAgent(algorithm_cfg,
                                      client,
                                      name='DQN',
                                      vehicle_name='global')

        for drone in range(cfg.num_agents):
            name_agent = "drone" + str(drone)
            wait_for_others[name_agent] = False
            iter[name_agent] = 1
            epi_num[name_agent] = 1
            data_tuple[name_agent] = []
            name_agent_list.append(name_agent)
            print_orderly(name_agent, 40)
            # TODO: turn the neural network off if global agent is present
            agent[name_agent] = PedraAgent(algorithm_cfg,
                                           client,
                                           name='DQN',
                                           vehicle_name=name_agent)

            current_state[name_agent] = agent[name_agent].get_state()

    elif cfg.mode == 'infer':
        iter = 1
        name_agent = 'drone0'
        name_agent_list.append(name_agent)
        agent[name_agent] = PedraAgent(algorithm_cfg,
                                       client,
                                       name=name_agent + 'DQN',
                                       vehicle_name=name_agent)

        env_cfg = read_cfg(config_filename=env_folder + 'config.cfg')
        nav_x = []
        nav_y = []
        altitude = {}
        altitude[name_agent] = []
        p_z, f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer(
            env_cfg=env_cfg, client=client, env_folder=env_folder)
        nav_text = ax_nav.text(0, 0, '')

        # Select initial position
        reset_to_initial(0, reset_array, client, vehicle_name=name_agent)
        old_posit[name_agent] = client.simGetVehiclePose(
            vehicle_name=name_agent)

    # Initialize variables

    # iter = 1
    # num_collisions = 0
    episode = {}
    active = True

    print_interval = 1
    automate = True
    choose = False
    print_qval = False
    last_crash = {}
    ret = {}
    distance = {}
    num_collisions = {}
    level = {}
    level_state = {}
    level_posit = {}
    times_switch = {}
    last_crash_array = {}
    ret_array = {}
    distance_array = {}
    epi_env_array = {}
    log_files = {}

    # If the phase is inference force the num_agents to 1
    hyphens = '-' * int((80 - len('Log files')) / 2)
    print(hyphens + ' ' + 'Log files' + ' ' + hyphens)
    for name_agent in name_agent_list:
        ret[name_agent] = 0
        num_collisions[name_agent] = 0
        last_crash[name_agent] = 0
        level[name_agent] = 0
        episode[name_agent] = 0
        level_state[name_agent] = [None] * len(reset_array[name_agent])
        level_posit[name_agent] = [None] * len(reset_array[name_agent])
        times_switch[name_agent] = 0
        last_crash_array[name_agent] = np.zeros(shape=len(
            reset_array[name_agent]),
                                                dtype=np.int32)
        ret_array[name_agent] = np.zeros(shape=len(reset_array[name_agent]))
        distance_array[name_agent] = np.zeros(
            shape=len(reset_array[name_agent]))
        epi_env_array[name_agent] = np.zeros(shape=len(
            reset_array[name_agent]),
                                             dtype=np.int32)
        distance[name_agent] = 0
        # Log file
        log_path = algorithm_cfg.network_path + '/' + name_agent + '/' + cfg.mode + 'log.txt'
        print("Log path: ", log_path)
        log_files[name_agent] = open(log_path, 'w')

    # switch_env = False

    print_orderly('Simulation begins', 80)

    # save_posit = old_posit

    # last_crash_array = np.zeros(shape=len(level_name), dtype=np.int32)
    # ret_array = np.zeros(shape=len(level_name))
    # distance_array = np.zeros(shape=len(level_name))
    # epi_env_array = np.zeros(shape=len(level_name), dtype=np.int32)

    while active:
        try:
            active, automate, algorithm_cfg, client = check_user_input(
                active, automate, agent[name_agent], client,
                old_posit[name_agent], initZ, fig_z, fig_nav, env_folder, cfg,
                algorithm_cfg)

            if automate:

                if cfg.mode == 'train':

                    if iter[name_agent] % algorithm_cfg.switch_env_steps == 0:
                        switch_env = True
                    else:
                        switch_env = False

                    for name_agent in name_agent_list:
                        while not wait_for_others[name_agent]:
                            start_time = time.time()
                            if switch_env:
                                posit1_old = client.simGetVehiclePose(
                                    vehicle_name=name_agent)
                                times_switch[
                                    name_agent] = times_switch[name_agent] + 1
                                level_state[name_agent][level[
                                    name_agent]] = current_state[name_agent]
                                level_posit[name_agent][
                                    level[name_agent]] = posit1_old
                                last_crash_array[name_agent][
                                    level[name_agent]] = last_crash[name_agent]
                                ret_array[name_agent][
                                    level[name_agent]] = ret[name_agent]
                                distance_array[name_agent][
                                    level[name_agent]] = distance[name_agent]
                                epi_env_array[name_agent][
                                    level[name_agent]] = episode[name_agent]

                                level[name_agent] = (
                                    level[name_agent] + 1) % len(
                                        reset_array[name_agent])

                                print(
                                    name_agent + ' :Transferring to level: ',
                                    level[name_agent], ' - ',
                                    level_name[name_agent][level[name_agent]])

                                if times_switch[name_agent] < len(
                                        reset_array[name_agent]):
                                    reset_to_initial(level[name_agent],
                                                     reset_array,
                                                     client,
                                                     vehicle_name=name_agent)
                                else:
                                    current_state[name_agent] = level_state[
                                        name_agent][level[name_agent]]
                                    posit1_old = level_posit[name_agent][
                                        level[name_agent]]
                                    reset_to_initial(level[name_agent],
                                                     reset_array,
                                                     client,
                                                     vehicle_name=name_agent)
                                    client.simSetVehiclePose(
                                        posit1_old,
                                        ignore_collison=True,
                                        vehicle_name=name_agent)
                                    time.sleep(0.1)

                                last_crash[name_agent] = last_crash_array[
                                    name_agent][level[name_agent]]
                                ret[name_agent] = ret_array[name_agent][
                                    level[name_agent]]
                                distance[name_agent] = distance_array[
                                    name_agent][level[name_agent]]
                                episode[name_agent] = epi_env_array[
                                    name_agent][int(level[name_agent] / 3)]
                                # environ = environ^True
                            else:
                                # TODO: policy from one global agent: DONE
                                if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA':
                                    agent_this_drone = global_agent
                                else:
                                    agent_this_drone = agent[name_agent]

                                action, action_type = policy_REINFORCE(
                                    current_state[name_agent],
                                    agent_this_drone)
                                # print('Evaluated Policy')
                                action_word = translate_action(
                                    action, algorithm_cfg.num_actions)
                                # Take the action
                                # agent[name_agent].take_action(action, algorithm_cfg.num_actions, SimMode=cfg.SimMode)
                                agent[name_agent].take_action(
                                    action,
                                    algorithm_cfg.num_actions,
                                    Mode='static')
                                # print('Took Action')
                                new_state[name_agent] = agent[
                                    name_agent].get_state()
                                # print('got state')
                                new_depth1, thresh = agent[
                                    name_agent].get_CustomDepth(cfg)
                                # print('Got state')

                                # Get GPS information
                                posit[name_agent] = client.simGetVehiclePose(
                                    vehicle_name=name_agent)
                                position = posit[name_agent].position
                                old_p = np.array([
                                    old_posit[name_agent].position.x_val,
                                    old_posit[name_agent].position.y_val
                                ])
                                new_p = np.array(
                                    [position.x_val, position.y_val])

                                # calculate distance
                                distance[name_agent] = distance[
                                    name_agent] + np.linalg.norm(new_p - old_p)
                                old_posit[name_agent] = posit[name_agent]
                                # print('calculated distance')
                                reward, crash = agent[name_agent].reward_gen(
                                    new_depth1, action, crash_threshold,
                                    thresh, debug, cfg)

                                ret[name_agent] = ret[name_agent] + reward
                                agent_state = agent[name_agent].GetAgentState()
                                # print('generated reward')

                                if agent_state.has_collided or distance[
                                        name_agent] < 0.01:
                                    num_collisions[
                                        name_agent] = num_collisions[
                                            name_agent] + 1
                                    if agent_state.has_collided:
                                        print(
                                            'Crash: Collision detected from environment'
                                        )
                                    else:
                                        print(
                                            'Crash: Collision detected from distance'
                                        )
                                    crash = True
                                    reward = -1

                                data_tuple[name_agent].append([
                                    current_state[name_agent], action, reward,
                                    crash
                                ])

                                # Train if episode ends
                                if crash:
                                    # Don't start another episode unless all agents have ended their episodes
                                    wait_for_others[name_agent] = True
                                    if distance[name_agent] < 0.01:
                                        # Drone won't move, reconnect
                                        print(
                                            'Recovering from drone mobility issue'
                                        )

                                        agent[
                                            name_agent].client, old_posit, initZ = connect_drone(
                                                ip_address=cfg.ip_address,
                                                phase=cfg.mode,
                                                num_agents=cfg.num_agents,
                                                client=client)

                                        agent[
                                            name_agent].client, old_posit, initZ = connect_drone(
                                                ip_address=cfg.ip_address,
                                                phase=cfg.mode,
                                                num_agents=cfg.num_agents,
                                                client=client)
                                        time.sleep(2)
                                        wait_for_others[name_agent] = False

                                    else:
                                        agent[
                                            name_agent].network_model.log_to_tensorboard(
                                                tag='Return',
                                                group=name_agent,
                                                value=ret[name_agent],
                                                index=epi_num[name_agent])

                                        agent[
                                            name_agent].network_model.log_to_tensorboard(
                                                tag='Safe Flight',
                                                group=name_agent,
                                                value=distance[name_agent],
                                                index=epi_num[name_agent])

                                        agent[
                                            name_agent].network_model.log_to_tensorboard(
                                                tag='Episode Length',
                                                group=name_agent,
                                                value=len(
                                                    data_tuple[name_agent]),
                                                index=epi_num[name_agent])

                                        # Train episode
                                        train_REINFORCE(
                                            data_tuple[name_agent],
                                            algorithm_cfg.batch_size,
                                            agent_this_drone,
                                            algorithm_cfg.learning_rate,
                                            algorithm_cfg.input_size,
                                            algorithm_cfg.gamma,
                                            epi_num[name_agent])
                                        c = agent_this_drone.network_model.get_vars(
                                        )[15]
                                        agent_this_drone.network_model.log_to_tensorboard(
                                            tag='weight',
                                            group=name_agent,
                                            value=c[0],
                                            index=epi_num[name_agent])

                                        data_tuple[name_agent] = []
                                        epi_num[name_agent] += 1
                                        ret[name_agent] = 0
                                        distance[name_agent] = 0
                                        last_crash[name_agent] = 0

                                        # print('train done, now reset')
                                        reset_to_initial(
                                            level[name_agent],
                                            reset_array,
                                            client,
                                            vehicle_name=name_agent)
                                        # time.sleep(0.2)
                                        current_state[name_agent] = agent[
                                            name_agent].get_state()
                                        old_posit[
                                            name_agent] = client.simGetVehiclePose(
                                                vehicle_name=name_agent)

                                        if epi_num[name_agent] % 100 == 0:
                                            agent_this_drone.network_model.save_network(
                                                algorithm_cfg.network_path,
                                                epi_num[name_agent])

                                        # if all are waiting for others, then we can reset.
                                        if all(wait_for_others.values()):
                                            # Now we can communicate weights
                                            print(
                                                'Communicating the weights and averaging them'
                                            )
                                            communicate_across_agents(
                                                agent, name_agent_list,
                                                algorithm_cfg)
                                            for n in name_agent_list:
                                                wait_for_others[n] = False

                                else:
                                    current_state[name_agent] = new_state[
                                        name_agent]

                                time_exec = time.time() - start_time
                                gpu_memory, gpu_utilization, sys_memory = get_SystemStats(
                                    process, cfg.NVIDIA_GPU)

                                for i in range(0, len(gpu_memory)):
                                    tag_mem = 'GPU' + str(i) + '-Memory-GB'
                                    tag_util = 'GPU' + str(i) + 'Utilization-%'
                                    agent_this_drone.network_model.log_to_tensorboard(
                                        tag=tag_mem,
                                        group='SystemStats',
                                        value=gpu_memory[i],
                                        index=iter[name_agent])
                                    agent_this_drone.network_model.log_to_tensorboard(
                                        tag=tag_util,
                                        group='SystemStats',
                                        value=gpu_utilization[i],
                                        index=iter[name_agent])
                                agent_this_drone.network_model.log_to_tensorboard(
                                    tag='Memory-GB',
                                    group='SystemStats',
                                    value=sys_memory,
                                    index=iter[name_agent])

                                s_log = '{:<6s} - Level {:>2d} - Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} lr: {:>1.8f} Ret = {:>+6.4f} Last Crash = {:<5d} t={:<1.3f} SF = {:<5.4f}  Reward: {:<+1.4f}  '.format(
                                    name_agent, int(level[name_agent]),
                                    iter[name_agent], epi_num[name_agent],
                                    action_word, action_type,
                                    algorithm_cfg.learning_rate,
                                    ret[name_agent], last_crash[name_agent],
                                    time_exec, distance[name_agent], reward)

                                if iter[name_agent] % print_interval == 0:
                                    print(s_log)
                                log_files[name_agent].write(s_log + '\n')

                                last_crash[
                                    name_agent] = last_crash[name_agent] + 1
                                if debug:
                                    cv2.imshow(
                                        name_agent,
                                        np.hstack(
                                            (np.squeeze(
                                                current_state[name_agent],
                                                axis=0),
                                             np.squeeze(new_state[name_agent],
                                                        axis=0))))
                                    cv2.waitKey(1)

                                if epi_num[
                                        name_agent] % algorithm_cfg.total_episodes == 0:
                                    print(automate)
                                    automate = False

                                iter[name_agent] += 1
                    # if iter % algorithm_cfg.communication_interval == 0:
                    #     print('Communicating the weights and averaging them')
                    #     communicate_across_agents(agent, name_agent_list, algorithm_cfg)

                    # iter += 1

                elif cfg.mode == 'infer':
                    # Inference phase
                    agent_state = agent[name_agent].GetAgentState()
                    if agent_state.has_collided:
                        print('Drone collided')
                        print("Total distance traveled: ",
                              np.round(distance[name_agent], 2))
                        active = False
                        client.moveByVelocityAsync(
                            vx=0,
                            vy=0,
                            vz=0,
                            duration=1,
                            vehicle_name=name_agent).join()

                        if nav_x:  # Nav_x is empty if the drone collides in first iteration
                            ax_nav.plot(nav_x.pop(),
                                        nav_y.pop(),
                                        'r*',
                                        linewidth=20)
                        file_path = env_folder + 'results/'
                        fig_z.savefig(file_path + 'altitude_variation.png',
                                      dpi=500)
                        fig_nav.savefig(file_path + 'navigation.png', dpi=500)
                        close_env(env_process)
                        print('Figures saved')
                    else:
                        posit[name_agent] = client.simGetVehiclePose(
                            vehicle_name=name_agent)
                        distance[name_agent] = distance[
                            name_agent] + np.linalg.norm(
                                np.array([
                                    old_posit[name_agent].position.x_val -
                                    posit[name_agent].position.x_val,
                                    old_posit[name_agent].position.y_val -
                                    posit[name_agent].position.y_val
                                ]))
                        # altitude[name_agent].append(-posit[name_agent].position.z_val+p_z)
                        altitude[name_agent].append(
                            -posit[name_agent].position.z_val - f_z)

                        quat = (posit[name_agent].orientation.w_val,
                                posit[name_agent].orientation.x_val,
                                posit[name_agent].orientation.y_val,
                                posit[name_agent].orientation.z_val)
                        yaw = euler_from_quaternion(quat)[2]

                        x_val = posit[name_agent].position.x_val
                        y_val = posit[name_agent].position.y_val
                        z_val = posit[name_agent].position.z_val

                        nav_x.append(env_cfg.alpha * x_val + env_cfg.o_x)
                        nav_y.append(env_cfg.alpha * y_val + env_cfg.o_y)
                        nav.set_data(nav_x, nav_y)
                        nav_text.remove()
                        nav_text = ax_nav.text(
                            25,
                            55,
                            'Distance: ' +
                            str(np.round(distance[name_agent], 2)),
                            style='italic',
                            bbox={
                                'facecolor': 'white',
                                'alpha': 0.5
                            })

                        line_z.set_data(np.arange(len(altitude[name_agent])),
                                        altitude[name_agent])
                        ax_z.set_xlim(0, len(altitude[name_agent]))
                        fig_z.canvas.draw()
                        fig_z.canvas.flush_events()

                        current_state[name_agent] = agent[
                            name_agent].get_state()
                        action, action_type = policy_REINFORCE(
                            current_state[name_agent], agent[name_agent])
                        action_word = translate_action(
                            action, algorithm_cfg.num_actions)
                        # Take continuous action
                        # agent[name_agent].take_action(action, algorithm_cfg.num_actions, Mode='static')

                        agent[name_agent].take_action(
                            action, algorithm_cfg.num_actions, Mode='static')
                        old_posit[name_agent] = posit[name_agent]

                        # Verbose and log making
                        s_log = 'Position = ({:<3.2f},{:<3.2f}, {:<3.2f}) Orientation={:<1.3f} Predicted Action: {:<8s}  '.format(
                            x_val, y_val, z_val, yaw, action_word)

                        print(s_log)
                        log_files[name_agent].write(s_log + '\n')

        except Exception as e:
            if str(e) == 'cannot reshape array of size 1 into shape (0,0,3)':
                print('Recovering from AirSim error')

                client, old_posit, initZ = connect_drone(
                    ip_address=cfg.ip_address,
                    phase=cfg.mode,
                    num_agents=cfg.num_agents,
                    client=client)
                time.sleep(2)
                agent[name_agent].client = client
                wait_for_others[name_agent] = False
            else:
                print('------------- Error -------------')
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                print(exc_type, fname, exc_tb.tb_lineno)
                print(exc_obj)
                automate = False
                print('Hit r and then backspace to start from this point')