Ejemplo n.º 1
0
class MultiAgent_DDPG(BaseEnvironment):

    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second': render2d.FPS
    }

    def __init__(self,
                 env_config,
                 test_mode=False,
                 render_mode='2d',
                 verbose=False):
        """
        The __init__ method declares all class atributes and calls
        the self.reset() to intialize them properly.

        Parameters
        ----------
            env_config : dict
                Configuration parameters for the environment.
                The default values are set in __init__.py
            test_mode : bool
                If test_mode is True, the environment will not be autonatically reset
                due to too low cumulative reward or too large distance from the path.
            render_mode : {'2d', '3d', 'both'}
                Whether to use 2d or 3d rendering. 'both' is currently broken.
            verbose
                Whether to print debugging information.
        """

        self.test_mode = test_mode
        self.render_mode = render_mode
        self.verbose = verbose
        self.config = env_config

        # Setting dimension of observation vector
        self.n_observations = len(
            Vessel.NAVIGATION_FEATURES) + 4 * self.config["n_sectors"]

        self.episode = 0
        self.total_t_steps = 0
        self.t_step = 0
        self.history = []

        # Declaring attributes
        #self.obstacles = None
        self.main_vessel = None

        #self.path = None

        self.reached_goal = None
        self.collision = None
        self.progress = None
        self.cumulative_reward = None
        self.last_reward = None
        self.last_episode = None
        self.rng = None
        self._tmp_storage = None

        self._action_space = gym.spaces.Box(low=np.array([-1, -1]),
                                            high=np.array([1, 1]),
                                            dtype=np.float32)
        self._observation_space = gym.spaces.Box(
            low=np.array([-1] * self.n_observations),
            high=np.array([1] * self.n_observations),
            dtype=np.float32)

        # Initializing rendering
        self.viewer2d = None
        self.viewer3d = None
        if self.render_mode == '2d' or self.render_mode == 'both':
            render2d.init_env_viewer(self)
        if self.render_mode == '3d' or self.render_mode == 'both':
            render3d.init_env_viewer(self,
                                     autocamera=self.config["autocamera3d"])

        self.reset()

    @property
    def path(self):
        return self.main_vessel.path

    @property
    def obstacles(self):
        return self.main_vessel.obstacles

    @property
    def vessel(self):
        return self.main_vessel

    def _generate(self):

        print('In GENERATE in MA')

        self.main_vessel = Vessel(self.config,
                                  width=self.config["vessel_width"])
        prog = 0
        self.path_prog_hist = np.array([prog])
        self.max_path_prog = prog
        self.rewarder = MultiRewarder(self.main_vessel)

        print(f'Ownship created!')
        self.moving_obstacles = [self.main_vessel]
        self.static_obstacles = []
        self.queued_vessels = []

        #Adding static obstacles
        #for _ in range(8):
        #   obstacle = CircularObstacle(*helpers.generate_obstacle(self.rng, self.main_vessel.path, self.main_vessel))
        #   self.static_obstacles.append(obstacle)

        self._vessel_count = 1
        #Adding moving obstacles (ships)
        curr_vessel_count = self._vessel_count
        for i in range(curr_vessel_count, curr_vessel_count + 5):
            #obst_speed = np.random.random()
            ship = Vessel(self.config,
                          width=self.config["vessel_width"],
                          index=i,
                          vessel_pos=self.main_vessel.position)
            self.moving_obstacles.append(ship)
            print(f'Ship {i} has been created')
            self._vessel_count += 1

        for ship in self.moving_obstacles:
            other_ships = [
                x for x in self.moving_obstacles if x.index != ship.index
            ]
            #ship.obstacles.extend(other_ships)
            ship.obstacles = np.hstack([self.static_obstacles, other_ships])

        print('Exiting GENERATE in MA')

    def step(self, action: list) -> (np.ndarray, float, bool, dict):
        """
        Steps the environment by one timestep. Returns observation, reward, done, info.

        Parameters
        ----------
        action : np.ndarray
        [thrust_input, torque_input].

        Returns
        -------
        obs : np.ndarray
        Observation of the environment after action is performed.
        reward : double
        The reward for performing action at his timestep.
        done : bool
        If True the episode is ended, due to either a collision or having reached the goal position.
        info : dict
        Dictionary with data used for reporting or debugging
        """
        print('IN STEP')
        if len(self.queued_vessels) == 0:
            current_vessel = self.main_vessel
        else:
            current_vessel = self.queued_vessels.pop(0)

        current_index = current_vessel.index
        print(f'Current vessel is ship {current_index}')

        #[vessel.update_without_agent(self.config["t_step_size"]) for vessel in self.moving_obstacles if vessel.index != current_index]

        action[0] = (action[0] + 1) / 2
        current_vessel.step(action)

        reward = self.rewarder.calculate(current_vessel)
        self.cumulative_reward += reward

        vessel_data = self.main_vessel.req_latest_data()
        self.collision = vessel_data['collision']
        self.reached_goal = vessel_data['reached_goal']
        self.progress = vessel_data['progress']

        info = {}
        info['collision'] = self.collision
        info['reached_goal'] = self.reached_goal
        info['progress'] = self.progress

        done = self._isdone()
        self._save_latest_step()

        self.moving_obstacles = self.main_vessel.nearby_vessels

        #Adding moving obstacles (ships)
        if not self.t_step % 150:
            #print(f'Time step: {self.t_step}, position of vessel: {self.main_vessel.position}')
            curr_vessel_count = self._vessel_count
            for i in range(curr_vessel_count, curr_vessel_count + 5):
                #obst_speed = np.random.random()
                ship = Vessel(self.config,
                              width=self.config["vessel_width"],
                              index=i,
                              vessel_pos=self.main_vessel.position)

                self.moving_obstacles.append(ship)
                print(f'Ship {i} has been created')
                self._vessel_count += 1

            for ship in self.moving_obstacles:
                other_ships = [
                    x for x in self.moving_obstacles if x.index != ship.index
                ]
                #ship.obstacles.extend(other_ships)
                ship.obstacles = np.hstack(
                    [self.static_obstacles, other_ships])

        if len(self.queued_vessels) == 0:
            self.queued_vessels = [
                x for x in self.moving_obstacles if x.index != 0
            ]
            next_vessel = self.main_vessel
        else:
            next_vessel = self.queued_vessels[0]
        obs = next_vessel.observe()

        self.t_step += 1
        print('EXITIG STEP')

        return (obs, reward, done, info)

        # [obst.update(self.config["t_step_size"]) for obst in self.moving_obstacles if obst.index != 0]
        #
        #
        # action[0] = (action[0] + 1)/2 # Done to be compatible with RL algorithms that require symmetric action spaces
        # if np.isnan(action).any(): action = np.zeros(action.shape)
        # self.main_vessel.step(action)
        #
        #
        # # Getting observation vector
        # obs = self.observe()
        # vessel_data = self.main_vessel.req_latest_data()
        # self.collision = vessel_data['collision']
        # self.reached_goal = vessel_data['reached_goal']
        # self.progress = vessel_data['progress']
        #
        # # Receiving agent's reward
        # reward = self.rewarder.calculate()
        # self.last_reward = reward
        # self.cumulative_reward += reward
        #
        # info = {}
        # info['collision'] = self.collision
        # info['reached_goal'] = self.reached_goal
        # info['progress'] = self.progress
        #
        # # Testing criteria for ending the episode
        # done = self._isdone()
        #
        # self._save_latest_step()
        # # If the environment is dynamic, calling self.update will change it.
        # self._update()
        #
        # self.t_step += 1
        #
        # #Adding moving obstacles (ships)
        # if not self.t_step % 100:
        #     #print(f'Time step: {self.t_step}, position of vessel: {self.main_vessel.position}')
        #     curr_vessel_count = self._vessel_count
        #     for i in range(curr_vessel_count ,curr_vessel_count+5):
        #         #obst_speed = np.random.random()
        #         ship = Vessel(self.config, width=self.config["vessel_width"], index=i, vessel_pos=self.main_vessel.position)
        #
        #         self.moving_obstacles.append(ship)
        #         print(f'Ship {i} has been created')
        #         self._vessel_count += 1
        #
        #
        #     for ship in self.moving_obstacles:
        #         other_ships = [x for x in self.moving_obstacles if x.index != ship.index]
        #         #ship.obstacles.extend(other_ships)
        #         ship.obstacles = np.hstack([self.static_obstacles, other_ships])
        #
        # return (obs, reward, done, info)

    def _update(self):
        valid_ships = [self.main_vessel]
        for ship in self.moving_obstacles:
            if (not ship.collision
                ) and ship.index != 0:  # and ship.reachable :
                valid_ships.append(ship)

    #    print(f'Time: {self.t_step}')
        print([x.index for x in valid_ships])

        self.moving_obstacles = valid_ships

        for ship in self.moving_obstacles:
            other_ships = [
                x for x in self.moving_obstacles if x.index != ship.index
            ]
            #ship.obstacles.extend(other_ships)
            ship.obstacles = np.hstack([self.static_obstacles, other_ships])
        #print('Exiting UPDATE in MA')

    def observe(self):
        navigation_states = self.main_vessel.navigate(self.path)
        sector_closenesses, sector_velocities, sector_moving_obstacles = self.main_vessel.perceive(
            self.obstacles)

        obs = np.concatenate([
            navigation_states, sector_closenesses, sector_velocities,
            sector_moving_obstacles
        ])
        return (obs)
Ejemplo n.º 2
0
class TwoVessel_HeadOn(BaseEnvironment):
    def __init__(self,
                 env_config,
                 test_mode=False,
                 render_mode='2d',
                 verbose=False):
        """
        The __init__ method declares all class atributes and calls
        the self.reset() to intialize them properly.

        Parameters
        ----------
            env_config : dict
                Configuration parameters for the environment.
                The default values are set in __init__.py
            test_mode : bool
                If test_mode is True, the environment will not be autonatically reset
                due to too low cumulative reward or too large distance from the path.
            render_mode : {'2d', '3d', 'both'}
                Whether to use 2d or 3d rendering. 'both' is currently broken.
            verbose
                Whether to print debugging information.
        """

        self.test_mode = test_mode
        self.render_mode = render_mode
        self.verbose = verbose
        self.config = env_config

        # Setting dimension of observation vector
        self.n_observations = len(
            Vessel.NAVIGATION_FEATURES
        ) + 3 * self.config["n_sectors"] + ColavRewarder.N_INSIGHTS

        self.episode = 0
        self.total_t_steps = 0
        self.t_step = 0
        self.history = []

        # Declaring attributes
        #self.obstacles = None
        self.main_vessel = None
        #self.agent = None

        #self.path = None

        self.reached_goal = None
        self.collision = None
        self.progress = None
        self.cumulative_reward = None
        self.last_reward = None
        self.last_episode = None
        self.rng = None
        self._tmp_storage = None

        self._action_space = gym.spaces.Box(low=np.array([-1, -1]),
                                            high=np.array([1, 1]),
                                            dtype=np.float32)
        self._observation_space = gym.spaces.Box(
            low=np.array([-1] * self.n_observations),
            high=np.array([1] * self.n_observations),
            dtype=np.float32)

        # Initializing rendering
        self._viewer2d = None
        self._viewer3d = None
        if self.render_mode == '2d' or self.render_mode == 'both':
            render2d.init_env_viewer(self)
        if self.render_mode == '3d' or self.render_mode == 'both':
            render3d.init_env_viewer(self,
                                     autocamera=self.config["autocamera3d"])

    #    self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/logs/agents/MovingObstacles-v0/1589625657ppo/6547288.pkl')
    #self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/logs/agents/MovingObstacles-v0/1590746004ppo/2927552.pkl')
    #    self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/logs/agents/MovingObstacles-v0/1590827849ppo/4070808.pkl')
    #'C:/Users/amalih/OneDrive - NTNU/github/logs/agents/MultiAgentPPO-v0/1064190.pkl'

    #self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/logs/agents/MovingObstacles-v0/1590705511ppo/4425456.pkl')
    #self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/gym-auv-master/logs/agents/MovingObstacles-v0/1589130704ppo/6916896.pkl')
    #self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/gym-auv-master/logs/agents/MovingObstacles-v0/1589031909ppo/1760568.pkl')
        self.agent = PPO2.load(
            'C:/Users/amalih/OneDrive - NTNU/github/logs/agents/MultiAgentPPO-v0/1591171914ppo/79288.pkl'
        )

        self.rewarder_dict = {}

        self.reset()
        print('Init done')

    def _generate(self):

        waypoints1 = np.vstack([[0, 0], [0, 500]]).T
        path1 = Path(waypoints1)

        init_pos1 = path1(0)
        init_angle1 = path1.get_direction(0)
        init_state1 = np.hstack([init_pos1, init_angle1])

        self.main_vessel = Vessel(self.config,
                                  init_state=init_state1,
                                  init_path=path1,
                                  width=2)  #self.config["vessel_width"])
        self.main_vessel.path = path1
        self.rewarder_dict[self.main_vessel.index] = ColavRewarder(
            self.main_vessel)
        self.rewarder = self.rewarder_dict[self.main_vessel.index]

        prog = 0
        self.path_prog_hist = np.array([prog])
        self.max_path_prog = prog
        self.moving_obstacles = [self.main_vessel]

        #Adding moving obstacle

        waypoints2 = np.vstack([[0, 150], [0, -400]]).T
        path2 = Path(waypoints2)

        init_pos2 = path2(0)
        init_angle2 = path2.get_direction(0)
        init_state2 = np.hstack([init_pos2, init_angle2])

        vessel = Vessel(self.config,
                        init_state=init_state2,
                        init_path=path2,
                        index=1,
                        width=2)  #self.config["vessel_width"])
        self.rewarder_dict[vessel.index] = ColavRewarder(vessel)
        self.moving_obstacles.append(vessel)
        vessel.path = path2

        for vessel in self.moving_obstacles:
            other_vessels = [
                x for x in self.moving_obstacles if x.index != vessel.index
            ]
            vessel.obstacles = np.hstack([other_vessels])

        print('Generated vessels!')

        #self._update()

    @property
    def path(self):
        return self.main_vessel.path

    @property
    def obstacles(self):
        return self.main_vessel.obstacles

    @property
    def vessel(self):
        return self.main_vessel

    def step(self, action: list) -> (np.ndarray, float, bool, dict):
        """
        Steps the environment by one timestep. Returns observation, reward, done, info.

        Parameters
        ----------
        action : np.ndarray
        [thrust_input, torque_input].

        Returns
        -------
        obs : np.ndarray
        Observation of the environment after action is performed.
        reward : double
        The reward for performing action at his timestep.
        done : bool
        If True the episode is ended, due to either a collision or having reached the goal position.
        info : dict
        Dictionary with data used for reporting or debugging
        """

        action[0] = (
            action[0] + 1
        ) / 2  # Done to be compatible with RL algorithms that require symmetric action spaces
        if np.isnan(action).any(): action = np.zeros(action.shape)
        self.main_vessel.step(action)

        for vessel in self.moving_obstacles:
            if vessel.index != 0:
                obs = vessel.observe()
                reward = self.rewarder_dict[vessel.index].calculate()
                insight = self.rewarder_dict[vessel.index].insight()
                #print(f'Reward for vessel {vessel.index}: {reward} -- lambda: {insight}')
                obs = np.concatenate([insight, obs])
                action, _states = self.agent.predict(obs, deterministic=True)
                action[0] = (action[0] + 1) / 2
                vessel.step(action)

        # Testing criteria for ending the episode
        done = self._isdone()
        self._save_latest_step()

        # Getting observation vector
        obs = self.observe()
        vessel_data = self.main_vessel.req_latest_data()
        self.collision = vessel_data['collision']
        self.reached_goal = vessel_data['reached_goal']
        self.progress = vessel_data['progress']

        # Receiving agent's reward
        reward = self.rewarder.calculate()
        self.last_reward = reward
        #self.cumulative_reward += reward

        info = {}
        info['collision'] = self.collision
        info['reached_goal'] = self.reached_goal
        info['progress'] = self.progress

        self.t_step += 1

        return (obs, reward, done, info)