def compute_reward(self, rl_actions, **kwargs): """See class definition.""" num_rl = self.k.vehicle.num_rl_vehicles lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl]) return (rewards.desired_velocity(self) + rewards.rl_forward_progress( self, gain=0.1) - rewards.boolean_action_penalty( lane_change_acts, gain=1.0))
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.test: return 0 return - rewards.min_delay_unscaled(self) \ - rewards.boolean_action_penalty(rl_actions >= 0.5, gain=1.0)
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" r = 0 if rl_actions is not None: r = -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=2) if self.env_params.evaluate: r += -rewards.min_delay_unscaled(self) #print(f"Reward computed: {r}, rl_actions: {rl_actions}") else: r += (-rewards.min_delay_unscaled(self) + rewards.penalize_standstill(self, gain=0.2)) print(f"Reward computed: {r}, rl_actions: {rl_actions}") return r
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return -rewards.min_delay_unscaled(self) else: """ print('delay penalty:', -rewards.min_delay_unscaled(self)) print('standstill penalty:', rewards.penalize_standstill(self, gain=0.2)) print('action penalty:', -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=0.2)) """ #return (- rewards.min_delay_unscaled(self) + # rewards.penalize_standstill(self, gain=0.2)) return ( -rewards.min_delay_unscaled(self) - rewards.boolean_action_penalty(rl_actions >= 0.5, gain=0.01) - rewards.waiting_penalty(self, gain=0.01))
def test_boolean_action_penalty(self): """Test the boolean_action_penalty method.""" actions = [False, False, False, False, False] self.assertEqual(boolean_action_penalty(actions, gain=1), 0) self.assertEqual(boolean_action_penalty(actions, gain=2), 0) actions = [True, False, False, False, False] self.assertEqual(boolean_action_penalty(actions, gain=1), 1) self.assertEqual(boolean_action_penalty(actions, gain=2), 2) actions = [True, False, False, True, False] self.assertEqual(boolean_action_penalty(actions, gain=1), 2) self.assertEqual(boolean_action_penalty(actions, gain=2), 4)
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" # penalize traffic light changes for occurring return -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=1.0)