def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return -rewards.min_delay(self) else: return (500.0 - rewards.min_delay(self) + rewards.penalize_standstill(self, gain=0.2))
def test_min_delay(self): """Test the min_delay method.""" # try the case of an environment with no vehicles vehicles = VehicleParams() env, _ = ring_road_exp_setup(vehicles=vehicles) # check that the reward function return 0 in the case of no vehicles self.assertEqual(min_delay(env), 0) # try the case of multiple vehicles vehicles = VehicleParams() vehicles.add("test", num_vehicles=10) env, _ = ring_road_exp_setup(vehicles=vehicles) # check the min_delay upon reset self.assertAlmostEqual(min_delay(env), 0) # change the speed of one vehicle env.k.vehicle.test_set_speed("test_0", 10) # check the min_delay with the new speed self.assertAlmostEqual(min_delay(env), 0.0333333333333)
def compute_reward(self, rl_actions, **kwargs): """Reward function for the RL agent(s). MUST BE implemented in new environments. Defaults to 0 for non-implemented environments. Parameters ---------- rl_actions : array_like actions performed by rl vehicles kwargs : dict other parameters of interest. Contains a "fail" element, which is True if a vehicle crashed, and False otherwise Returns ------- reward : float or list of float""" # in the warmup steps if rl_actions is None: return {} rewards = {} for rl_id in self.k.vehicle.get_rl_ids(): if self.env_params.evaluate: # reward is speed of vehicle if we are in evaluation mode reward = self.k.vehicle.get_speed(rl_id) elif kwargs['fail']: # reward is 0 if a collision occurred reward = 0 else: # Reward function used to encourage minimization of total delay. cost1 = min_delay(self) # Reward function that penalizes vehicle standstill (refer to all vehicles) # the higher the worst cost2 = penalize_standstill(self) # todo: add selfish penalization for current agent being still # Calculate the average delay for the current vehicle (Selfishness) cost3 = avg_delay_specified_vehicles(self, rl_id) # get the type of the agent (coop or not) rl_type = self.k.vehicle.get_type(rl_id) # then get the coop weight w = self.k.vehicle.type_parameters.get(rl_type).get( 'cooperative_weight') # estimate the coop part of the reward coop_reward = (cost1 + cost2) * w # jerk related reward factor, to penalize excessive de/acceleration behaviors jerk = self.k.vehicle.get_jerk(rl_id) # getting scaling factor for jerk scaling_factor = self.env_params.additional_params["max_accel"] \ - self.env_params.additional_params["max_decel"] scaling_factor /= self.sim_params.sim_step # the higher the worst, tha jerk = -pow(jerk, 2) / pow(scaling_factor, 2) # maximum penalization can be 4 reward = max(Params.baseline - coop_reward - cost3 - jerk, 0) if Params.debug: termcolor.colored( f"\nReward for agent {rl_id} is : {reward}", "yellow") rewards[rl_id] = reward return rewards
def compute_reward(self, rl_actions, **kwargs): reward = rewards.min_delay(self) return reward