Esempio n. 1
0
    def test_penalize_standstill(self):
        """Test the penalize_standstill method."""
        vehicles = VehicleParams()
        vehicles.add("test", num_vehicles=10)

        env_params = EnvParams(
            additional_params={
                "target_velocity": 10,
                "max_accel": 1,
                "max_decel": 1,
                "sort_vehicles": False
            })

        env, _ = ring_road_exp_setup(vehicles=vehicles, env_params=env_params)

        # check the penalty is acknowledging all vehicles
        self.assertEqual(penalize_standstill(env, gain=1), -10)
        self.assertEqual(penalize_standstill(env, gain=2), -20)

        # change the speed of one vehicle
        env.k.vehicle.test_set_speed("test_0", 10)

        # check the penalty is acknowledging all vehicles but one
        self.assertEqual(penalize_standstill(env, gain=1), -9)
        self.assertEqual(penalize_standstill(env, gain=2), -18)
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     if self.env_params.evaluate:
         return - rewards.min_delay_unscaled(self)
     else:
         return (- rewards.min_delay_unscaled(self) +
                 rewards.penalize_standstill(self, gain=0.2))
Esempio n. 3
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        if rl_actions is None:
            return {}

        # rew_delay = -rewards.min_delay_unscaled(self)
        rew_delay = -0.01 * rewards.delay(self)
        rew_still = rewards.penalize_standstill(self, gain=0.2, threshold=2.0)

        rews = {}
        # FIXME(cathywu) check that rl_ids is consistent with actions
        # provided in rl_actions. If not, why not?
        for rl_id in self.k.vehicle.get_rl_ids():
            if self.env_params.evaluate:
                rews[rl_id] = rew_delay
            elif rl_id in rl_actions:
                max_speed = self.k.vehicle.get_max_speed(rl_id)
                # Note: rl_action has already been applied
                curr_speed = self.k.vehicle.get_speed(rl_id)
                # control cost, also penalizes over-acceleration
                rew_speed = -0.01 * np.abs(curr_speed - max_speed)
                rews[rl_id] = rew_delay + rew_still + rew_speed
            else:
                rews[rl_id] = rew_delay + rew_still
        return rews
Esempio n. 4
0
    def compute_reward(self, rl_actions, **kwargs):
        ids = self.k.vehicle.get_ids()
        speeds = self.k.vehicle.get_speed(ids)
        rl_ids = self.k.vehicle.get_rl_ids()

        #Only count speeds of human cars in edge prior to the 'construction site'
        #RL vehicles encouraged to make forward progress
        meanHumanSpeed = 0
        meanRLSpeed = 0
        RL_SPEED_GAIN = 0.1
        humanSpeeds = []
        rlSpeeds = []
        for veh_id in ids:
            if veh_id in rl_ids:
                speed = self.k.vehicle.get_speed(veh_id)
                if abs(speed) > 10000: continue
                rlSpeeds.append(abs(speed))
            else:
                edge = self.k.vehicle.get_edge(veh_id)
                if edge == "edge3" or edge == "edge4":
                    speed = self.k.vehicle.get_speed(veh_id)
                    if abs(speed) > 10000: continue
                    humanSpeeds.append(abs(speed))

        if (len(humanSpeeds) == 0): meanHumanSpeed = 0
        else: meanHumanSpeed = np.mean(humanSpeeds)

        if (len(rlSpeeds) == 0): meanRLSpeed = 0
        else: meanRLSpeed = np.mean(rlSpeeds) * RL_SPEED_GAIN

        return (meanHumanSpeed + meanRLSpeed -
                abs(rewards.penalize_standstill(self, gain=0.3)))
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        r = 0
        if rl_actions is not None:
            r = -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=2)

        if self.env_params.evaluate:
            r += -rewards.min_delay_unscaled(self)
            #print(f"Reward computed: {r}, rl_actions: {rl_actions}")

        else:
            r += (-rewards.min_delay_unscaled(self) +
                  rewards.penalize_standstill(self, gain=0.2))
        print(f"Reward computed: {r}, rl_actions: {rl_actions}")
        return r
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        if rl_actions is None:
            return {}

        if self.env_params.evaluate:
            rew = -rewards.min_delay_unscaled(self)
        else:
            rew = -rewards.min_delay_unscaled(self) \
                  + rewards.penalize_standstill(self, gain=0.2)

        # each agent receives reward normalized by number of lights
        rew /= self.num_traffic_lights

        rews = {}
        for rl_id in rl_actions.keys():
            rews[rl_id] = rew
        return rews
Esempio n. 7
0
    def compute_reward(self, rl_actions, **kwargs):
        """Reward function for the RL agent(s).

        MUST BE implemented in new environments.
        Defaults to 0 for non-implemented environments.

        Parameters
        ----------
        rl_actions : array_like
            actions performed by rl vehicles
        kwargs : dict
            other parameters of interest. Contains a "fail" element, which
            is True if a vehicle crashed, and False otherwise

        Returns
        -------
        reward : float or list of float"""

        # in the warmup steps
        if rl_actions is None:
            return {}

        rewards = {}
        for rl_id in self.k.vehicle.get_rl_ids():
            if self.env_params.evaluate:
                # reward is speed of vehicle if we are in evaluation mode
                reward = self.k.vehicle.get_speed(rl_id)
            elif kwargs['fail']:
                # reward is 0 if a collision occurred
                reward = 0
            else:

                # Reward function used to encourage minimization of total delay.
                cost1 = min_delay(self)

                # Reward function that penalizes vehicle standstill (refer to all vehicles)
                # the higher the worst
                cost2 = penalize_standstill(self)

                # todo: add selfish penalization for current agent being still

                # Calculate the average delay for the current vehicle (Selfishness)
                cost3 = avg_delay_specified_vehicles(self, rl_id)

                # get the type of the agent (coop or not)
                rl_type = self.k.vehicle.get_type(rl_id)

                # then get the coop weight
                w = self.k.vehicle.type_parameters.get(rl_type).get(
                    'cooperative_weight')

                # estimate the coop part of the reward
                coop_reward = (cost1 + cost2) * w

                # jerk related reward factor, to penalize excessive de/acceleration behaviors
                jerk = self.k.vehicle.get_jerk(rl_id)

                # getting scaling factor for jerk
                scaling_factor = self.env_params.additional_params["max_accel"] \
                                 - self.env_params.additional_params["max_decel"]
                scaling_factor /= self.sim_params.sim_step

                # the higher the worst, tha
                jerk = -pow(jerk, 2) / pow(scaling_factor, 2)

                # maximum penalization can be 4
                reward = max(Params.baseline - coop_reward - cost3 - jerk, 0)

                if Params.debug:
                    termcolor.colored(
                        f"\nReward for agent {rl_id} is : {reward}", "yellow")

            rewards[rl_id] = reward
        return rewards