def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     if self.env_params.evaluate:
         return - rewards.min_delay_unscaled(self)
     else:
         return (- rewards.min_delay_unscaled(self) +
                 rewards.penalize_standstill(self, gain=0.2))
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        r = 0
        if rl_actions is not None:
            r = -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=2)

        if self.env_params.evaluate:
            r += -rewards.min_delay_unscaled(self)
            #print(f"Reward computed: {r}, rl_actions: {rl_actions}")

        else:
            r += (-rewards.min_delay_unscaled(self) +
                  rewards.penalize_standstill(self, gain=0.2))
        print(f"Reward computed: {r}, rl_actions: {rl_actions}")
        return r
Beispiel #3
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     if self.env_params.evaluate:
         return -rewards.min_delay_unscaled(self)
     else:
         """
         print('delay penalty:', -rewards.min_delay_unscaled(self))
         print('standstill penalty:', rewards.penalize_standstill(self, gain=0.2))
         print('action penalty:', -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=0.2))
         """
         #return (- rewards.min_delay_unscaled(self) +
         #        rewards.penalize_standstill(self, gain=0.2))
         return (
             -rewards.min_delay_unscaled(self) -
             rewards.boolean_action_penalty(rl_actions >= 0.5, gain=0.01) -
             rewards.waiting_penalty(self, gain=0.01))
Beispiel #4
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        if self.test:
            return 0

        return - rewards.min_delay_unscaled(self) \
            - rewards.boolean_action_penalty(rl_actions >= 0.5, gain=1.0)
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        if rl_actions is None:
            return {}

        if self.env_params.evaluate:
            rew = -rewards.min_delay_unscaled(self)
        else:
            rew = -rewards.min_delay_unscaled(self) \
                  + rewards.penalize_standstill(self, gain=0.2)

        # each agent receives reward normalized by number of lights
        rew /= self.num_traffic_lights

        rews = {}
        for rl_id in rl_actions.keys():
            rews[rl_id] = rew
        return rews
Beispiel #6
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     if self.env_params.evaluate:
         return -rewards.min_delay_unscaled(self)
     else:
         return rewards.desired_velocity(self, fail=kwargs["fail"])
Beispiel #7
0
 def compute_reward(self, state, rl_actions, **kwargs):
     if self.env_params.evaluate:
         return rewards.min_delay_unscaled(self)
     else:
         return rewards.desired_velocity(self, fail=kwargs["fail"])