def test_penalize_standstill(self): """Test the penalize_standstill method.""" vehicles = VehicleParams() vehicles.add("test", num_vehicles=10) env_params = EnvParams( additional_params={ "target_velocity": 10, "max_accel": 1, "max_decel": 1, "sort_vehicles": False }) env, _ = ring_road_exp_setup(vehicles=vehicles, env_params=env_params) # check the penalty is acknowledging all vehicles self.assertEqual(penalize_standstill(env, gain=1), -10) self.assertEqual(penalize_standstill(env, gain=2), -20) # change the speed of one vehicle env.k.vehicle.test_set_speed("test_0", 10) # check the penalty is acknowledging all vehicles but one self.assertEqual(penalize_standstill(env, gain=1), -9) self.assertEqual(penalize_standstill(env, gain=2), -18)
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return - rewards.min_delay_unscaled(self) else: return (- rewards.min_delay_unscaled(self) + rewards.penalize_standstill(self, gain=0.2))
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if rl_actions is None: return {} # rew_delay = -rewards.min_delay_unscaled(self) rew_delay = -0.01 * rewards.delay(self) rew_still = rewards.penalize_standstill(self, gain=0.2, threshold=2.0) rews = {} # FIXME(cathywu) check that rl_ids is consistent with actions # provided in rl_actions. If not, why not? for rl_id in self.k.vehicle.get_rl_ids(): if self.env_params.evaluate: rews[rl_id] = rew_delay elif rl_id in rl_actions: max_speed = self.k.vehicle.get_max_speed(rl_id) # Note: rl_action has already been applied curr_speed = self.k.vehicle.get_speed(rl_id) # control cost, also penalizes over-acceleration rew_speed = -0.01 * np.abs(curr_speed - max_speed) rews[rl_id] = rew_delay + rew_still + rew_speed else: rews[rl_id] = rew_delay + rew_still return rews
def compute_reward(self, rl_actions, **kwargs): ids = self.k.vehicle.get_ids() speeds = self.k.vehicle.get_speed(ids) rl_ids = self.k.vehicle.get_rl_ids() #Only count speeds of human cars in edge prior to the 'construction site' #RL vehicles encouraged to make forward progress meanHumanSpeed = 0 meanRLSpeed = 0 RL_SPEED_GAIN = 0.1 humanSpeeds = [] rlSpeeds = [] for veh_id in ids: if veh_id in rl_ids: speed = self.k.vehicle.get_speed(veh_id) if abs(speed) > 10000: continue rlSpeeds.append(abs(speed)) else: edge = self.k.vehicle.get_edge(veh_id) if edge == "edge3" or edge == "edge4": speed = self.k.vehicle.get_speed(veh_id) if abs(speed) > 10000: continue humanSpeeds.append(abs(speed)) if (len(humanSpeeds) == 0): meanHumanSpeed = 0 else: meanHumanSpeed = np.mean(humanSpeeds) if (len(rlSpeeds) == 0): meanRLSpeed = 0 else: meanRLSpeed = np.mean(rlSpeeds) * RL_SPEED_GAIN return (meanHumanSpeed + meanRLSpeed - abs(rewards.penalize_standstill(self, gain=0.3)))
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" r = 0 if rl_actions is not None: r = -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=2) if self.env_params.evaluate: r += -rewards.min_delay_unscaled(self) #print(f"Reward computed: {r}, rl_actions: {rl_actions}") else: r += (-rewards.min_delay_unscaled(self) + rewards.penalize_standstill(self, gain=0.2)) print(f"Reward computed: {r}, rl_actions: {rl_actions}") return r
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if rl_actions is None: return {} if self.env_params.evaluate: rew = -rewards.min_delay_unscaled(self) else: rew = -rewards.min_delay_unscaled(self) \ + rewards.penalize_standstill(self, gain=0.2) # each agent receives reward normalized by number of lights rew /= self.num_traffic_lights rews = {} for rl_id in rl_actions.keys(): rews[rl_id] = rew return rews
def compute_reward(self, rl_actions, **kwargs): """Reward function for the RL agent(s). MUST BE implemented in new environments. Defaults to 0 for non-implemented environments. Parameters ---------- rl_actions : array_like actions performed by rl vehicles kwargs : dict other parameters of interest. Contains a "fail" element, which is True if a vehicle crashed, and False otherwise Returns ------- reward : float or list of float""" # in the warmup steps if rl_actions is None: return {} rewards = {} for rl_id in self.k.vehicle.get_rl_ids(): if self.env_params.evaluate: # reward is speed of vehicle if we are in evaluation mode reward = self.k.vehicle.get_speed(rl_id) elif kwargs['fail']: # reward is 0 if a collision occurred reward = 0 else: # Reward function used to encourage minimization of total delay. cost1 = min_delay(self) # Reward function that penalizes vehicle standstill (refer to all vehicles) # the higher the worst cost2 = penalize_standstill(self) # todo: add selfish penalization for current agent being still # Calculate the average delay for the current vehicle (Selfishness) cost3 = avg_delay_specified_vehicles(self, rl_id) # get the type of the agent (coop or not) rl_type = self.k.vehicle.get_type(rl_id) # then get the coop weight w = self.k.vehicle.type_parameters.get(rl_type).get( 'cooperative_weight') # estimate the coop part of the reward coop_reward = (cost1 + cost2) * w # jerk related reward factor, to penalize excessive de/acceleration behaviors jerk = self.k.vehicle.get_jerk(rl_id) # getting scaling factor for jerk scaling_factor = self.env_params.additional_params["max_accel"] \ - self.env_params.additional_params["max_decel"] scaling_factor /= self.sim_params.sim_step # the higher the worst, tha jerk = -pow(jerk, 2) / pow(scaling_factor, 2) # maximum penalization can be 4 reward = max(Params.baseline - coop_reward - cost3 - jerk, 0) if Params.debug: termcolor.colored( f"\nReward for agent {rl_id} is : {reward}", "yellow") rewards[rl_id] = reward return rewards