Ejemplo n.º 1
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     num_rl = self.k.vehicle.num_rl_vehicles
     lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl])
     return (rewards.desired_velocity(self) + rewards.rl_forward_progress(
         self, gain=0.1) - rewards.boolean_action_penalty(
             lane_change_acts, gain=1.0))
Ejemplo n.º 2
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        # compute the system-level performance of vehicles from a velocity
        # perspective
        reward = rewards.rl_forward_progress(self,
                                             gain=self.forward_progress_gain)

        # Calculate collision reward
        collision_r = self.compute_collision_reward(rl_actions)
        reward += collision_r

        # punish excessive lane changes by reducing the reward by a set value
        # every time an rl car changes lanes (10% of max reward)
        for veh_id in self.k.vehicle.get_rl_ids():
            if self.k.vehicle.get_last_lc(veh_id) == self.time_counter:
                reward -= 0.1

        return reward
Ejemplo n.º 3
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        if self.env_params.evaluate:
            return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))
        else:
            # return a reward of 0 if a collision occurred
            if kwargs["fail"]:
                return 0

            # reward high system-level velocities
            cost1 = rewards.desired_velocity(self, fail=kwargs["fail"])

            # encourage rl to move
            cost2 = rewards.rl_forward_progress(self,
                                                gain=1) / (30 * self.num_rl)
            # weights for cost1, cost2, and cost3, respectively
            eta1, eta2 = 0.50, 0.50

            return max(eta1 * cost1 + eta2 * cost2, 0)