Exemple #1
0
 def act(self, action: np.ndarray) -> None:
     if self.clip:
         action = np.clip(action, -1, 1)
     if self.longitudinal and self.lateral:
         self.controlled_vehicle.act({
             "acceleration":
             utils.lmap(action[0], [-1, 1], self.acceleration_range),
             "steering":
             utils.lmap(action[1], [-1, 1], self.steering_range),
         })
     elif self.longitudinal:
         self.controlled_vehicle.act({
             "acceleration":
             utils.lmap(action[0], [-1, 1], self.acceleration_range),
             "steering":
             0,
         })
     elif self.lateral:
         self.controlled_vehicle.act({
             "acceleration":
             0,
             "steering":
             utils.lmap(action[0], [-1, 1], self.steering_range)
         })
     self.last_action = action
Exemple #2
0
    def normalize_obs(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Normalize the observation values.

        For now, assume that the road is straight along the x axis.
        :param Dataframe df: observation data
        """
        if not self.features_range:
            side_lanes = self.env.road.network.all_side_lanes(
                self.observer_vehicle.lane_index)
            self.features_range = {
                "x": [-5.0 * MDPVehicle.SPEED_MAX, 5.0 * MDPVehicle.SPEED_MAX],
                "y": [
                    -AbstractLane.DEFAULT_WIDTH * len(side_lanes),
                    AbstractLane.DEFAULT_WIDTH * len(side_lanes)
                ],
                "vx": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX],
                "vy": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX]
            }
        for feature, f_range in self.features_range.items():
            if feature in df:
                df[feature] = utils.lmap(df[feature], [f_range[0], f_range[1]],
                                         [-1, 1])
                if self.clip:
                    df[feature] = np.clip(df[feature], -1, 1)
        return df
Exemple #3
0
    def _reward(self, action: int) -> float:
        """
        The vehicle is rewarded for driving with high speed on lanes to the right and avoiding collisions

        But an additional altruistic penalty is also suffered if any vehicle on the merging lane has a low speed.

        :param action: the action performed
        :return: the reward of the state-action transition
        """
        action_reward = {
            0: self.LANE_CHANGE_REWARD,
            1: 0,
            2: self.LANE_CHANGE_REWARD,
            3: 0,
            4: 0
        }
        reward = self.COLLISION_REWARD * self.vehicle.crashed \
                 + self.RIGHT_LANE_REWARD * self.vehicle.lane_index[2] / 1 \
                 + self.HIGH_SPEED_REWARD * self.vehicle.speed_index / (self.vehicle.SPEED_COUNT - 1)

        # Altruistic penalty
        for vehicle in self.road.vehicles:
            if vehicle.lane_index == ("b", "c", 2) and isinstance(
                    vehicle, ControlledVehicle):
                reward += self.MERGING_SPEED_REWARD * \
                          (vehicle.target_speed - vehicle.speed) / vehicle.target_speed

        return utils.lmap(action_reward[action] + reward, [
            self.COLLISION_REWARD + self.MERGING_SPEED_REWARD,
            self.HIGH_SPEED_REWARD + self.RIGHT_LANE_REWARD
        ], [0, 1])
Exemple #4
0
 def _agent_reward(self, action: int, vehicle: Vehicle) -> float:
     reward = self.config["collision_reward"] * vehicle.crashed \
              + self.HIGH_SPEED_REWARD * (vehicle.speed_index == vehicle.SPEED_COUNT - 1)
     reward = self.ARRIVED_REWARD if self.has_arrived(vehicle) else reward
     if self.config["normalize_reward"]:
         reward = utils.lmap(
             reward, [self.config["collision_reward"], self.ARRIVED_REWARD],
             [0, 1])
     return reward
Exemple #5
0
 def _reward(self, action: int) -> float:
     lane_change = action == 0 or action == 2
     reward = self.COLLISION_REWARD * self.vehicle.crashed \
              + self.HIGH_SPEED_REWARD * MDPVehicle.get_speed_index(self.vehicle) / max(MDPVehicle.SPEED_COUNT - 1, 1) \
              + self.LANE_CHANGE_REWARD * lane_change
     return utils.lmap(reward, [
         self.COLLISION_REWARD + self.LANE_CHANGE_REWARD,
         self.HIGH_SPEED_REWARD
     ], [0, 1])
Exemple #6
0
    def observe(self) -> np.ndarray:
        if not self.env.road:
            return np.zeros(self.space().shape)

        if self.absolute:
            raise NotImplementedError()
        else:
            # Add nearby traffic
            self.grid.fill(0)
            df = pd.DataFrame.from_records([
                v.to_dict(self.observer_vehicle)
                for v in self.env.road.vehicles
            ])
            # Normalize
            df = self.normalize(df)
            # Fill-in features
            for layer, feature in enumerate(self.features):
                for _, vehicle in df.iterrows():
                    x, y = vehicle["x"], vehicle["y"]
                    # Recover unnormalized coordinates for cell index
                    if "x" in self.features_range:
                        x = utils.lmap(x, [-1, 1], [
                            self.features_range["x"][0],
                            self.features_range["x"][1]
                        ])
                    if "y" in self.features_range:
                        y = utils.lmap(y, [-1, 1], [
                            self.features_range["y"][0],
                            self.features_range["y"][1]
                        ])
                    cell = (int(
                        (x - self.grid_size[0, 0]) / self.grid_step[0]),
                            int((y - self.grid_size[1, 0]) /
                                self.grid_step[1]))
                    if 0 <= cell[1] < self.grid.shape[-2] and 0 <= cell[
                            0] < self.grid.shape[-1]:
                        self.grid[layer, cell[1], cell[0]] = vehicle[feature]
            # Clip
            obs = np.clip(self.grid, -1, 1)
            return obs
    def _reward(self, action: Action) :#-> float: now we return a list
        """
        The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
        :param action: the last action performed
        :return: the corresponding reward
        """

        # -> float: now we change it to return a list!!!!!
        n_defenders=self.config["n_defenders"]
        n_attackers=self.config["n_attackers"]
        n_dummies=self.config["n_dummies"]
        rewards=[]
        for vehicle in self.controlled_vehicles:
        
            neighbours = self.road.network.all_side_lanes(vehicle.lane_index)
            lane = vehicle.target_lane_index[2] if isinstance(vehicle, ControlledVehicle) \
                else vehicle.lane_index[2]
            scaled_speed = utils.lmap(vehicle.speed, self.config["reward_speed_range"], [0, 1])
            reward = \
                + self.config["collision_reward"] * vehicle.crashed \
                + self.RIGHT_LANE_REWARD * lane / max(len(neighbours) - 1, 1) \
                + self.HIGH_SPEED_REWARD * np.clip(scaled_speed, 0, 1)
            #reward = utils.lmap(reward,
            #              [self.config["collision_reward"], self.HIGH_SPEED_REWARD + self.RIGHT_LANE_REWARD],
            #              [0, 1])
            #reward = 0 if not vehicle.on_road else reward
            reward = -1 if not vehicle.on_road else reward
            '''
            scaled_speed = utils.lmap(vehicle.speed, self.config["reward_speed_range"], [0, 1])
            reward = \
                + self.config["collision_reward"] * vehicle.crashed \
                + self.HIGH_SPEED_REWARD * np.clip(scaled_speed, 0, 1)
            #reward = utils.lmap(reward,
            #                    [self.config["collision_reward"], self.HIGH_SPEED_REWARD ],
            #                    [0, 1])
            reward = -1 if not vehicle.on_road else reward
            '''
            rewards.append(reward)

            # save configuration to self._inner_backup_config
            if self.config["enable_backtrack"]:
                self._save_backtrack_configuration(action)
            
            # reward correction based on the last several steps' behavior
            # currently reward is not changed. Just implement backtrack
            if self.vehicle.crashed:
                reward = self._reward_backward_correction(reward)
    
         
        return rewards
Exemple #8
0
    def normalize(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Normalize the observation values.

        For now, assume that the road is straight along the x axis.
        :param Dataframe df: observation data
        """
        if not self.features_range:
            self.features_range = {
                "vx": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX],
                "vy": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX]
            }
        for feature, f_range in self.features_range.items():
            if feature in df:
                df[feature] = utils.lmap(df[feature], [f_range[0], f_range[1]],
                                         [-1, 1])
        return df
Exemple #9
0
    def _reward(self, action: Action):  #-> float: now we return a list
        """
        The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
        :param action: the last action performed
        :return: the corresponding reward
        """

        # -> float: now we change it to return a list!!!!!

        rewards = []
        for i, vehicle in enumerate(self.controlled_vehicles):

            neighbours = self.road.network.all_side_lanes(vehicle.lane_index)
            lane = vehicle.target_lane_index[2] if isinstance(vehicle, ControlledVehicle) \
                else vehicle.lane_index[2]
            scaled_speed = utils.lmap(vehicle.speed,
                                      self.config["reward_speed_range"],
                                      [0, 1])
            reward = \
                 self.config["collision_reward"] * vehicle.crashed \
                + self.RIGHT_LANE_REWARD * lane / max(len(neighbours) - 1, 1) \
                + self.HIGH_SPEED_REWARD * np.clip(scaled_speed, 0, 1)
            #reward = utils.lmap(reward,
            #              [self.config["collision_reward"], self.HIGH_SPEED_REWARD + self.RIGHT_LANE_REWARD],
            #              [0, 1])
            #reward = 0 if not vehicle.on_road else reward
            reward = -1 if not vehicle.on_road else reward

            if self.config['task_type'] == 'attack':
                if i >= self.config['n_defenders'] and i < (
                        self.config['n_defenders'] +
                        self.config['n_attackers']):
                    reward *= 0
                    reward = -0.5 if not vehicle.on_road or vehicle.crashed else 0

            rewards.append(reward)

        return rewards