def compute_reward(self, rl_actions, **kwargs): """See class definition.""" num_rl = self.k.vehicle.num_rl_vehicles lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl]) return (rewards.desired_velocity(self) + rewards.rl_forward_progress( self, gain=0.1) - rewards.boolean_action_penalty( lane_change_acts, gain=1.0))
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" # compute the system-level performance of vehicles from a velocity # perspective reward = rewards.rl_forward_progress(self, gain=self.forward_progress_gain) # Calculate collision reward collision_r = self.compute_collision_reward(rl_actions) reward += collision_r # punish excessive lane changes by reducing the reward by a set value # every time an rl car changes lanes (10% of max reward) for veh_id in self.k.vehicle.get_rl_ids(): if self.k.vehicle.get_last_lc(veh_id) == self.time_counter: reward -= 0.1 return reward
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids())) else: # return a reward of 0 if a collision occurred if kwargs["fail"]: return 0 # reward high system-level velocities cost1 = rewards.desired_velocity(self, fail=kwargs["fail"]) # encourage rl to move cost2 = rewards.rl_forward_progress(self, gain=1) / (30 * self.num_rl) # weights for cost1, cost2, and cost3, respectively eta1, eta2 = 0.50, 0.50 return max(eta1 * cost1 + eta2 * cost2, 0)