def test_average_velocity(self): """Test the average_velocity method.""" vehicles = VehicleParams() vehicles.add("test", num_vehicles=10) env, _ = ring_road_exp_setup(vehicles=vehicles) # check that the fail attribute leads to a zero return self.assertEqual(average_velocity(env, fail=True), 0) # check the average speed upon reset self.assertEqual(average_velocity(env, fail=False), 0) # change the speed of one vehicle env.k.vehicle.test_set_speed("test_0", 10) # check the new average speed self.assertEqual(average_velocity(env, fail=False), 1) # recreate the environment with no vehicles vehicles = VehicleParams() env, _ = ring_road_exp_setup(vehicles=vehicles) # check that the reward function return 0 in the case of no vehicles self.assertEqual(average_velocity(env, fail=False), 0)
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if kwargs['fail'] == False: return rewards.average_velocity(self) else: return rewards.average_velocity(self) - 600 '''
def compute_reward(self, rl_actions, **kwargs): if self.env_params.evaluate: return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids())) else: if len(self.k.vehicle.get_ids()) == 0: return 1.0 reward = rewards.average_velocity(self) return reward / 30
def test_average_velocity(self): """Test the average_velocity method.""" vehicles = Vehicles() vehicles.add("test", num_vehicles=10) env, scenario = ring_road_exp_setup(vehicles=vehicles) # check that the fail attribute leads to a zero return self.assertEqual(average_velocity(env, fail=True), 0) # check the average speed upon reset self.assertEqual(average_velocity(env, fail=False), 0) # change the speed of one vehicle env.vehicles.test_set_speed("test_0", 10) # check the new average speed self.assertEqual(average_velocity(env, fail=False), 1)
def compute_reward(self, rl_actions, **kwargs): if 'max_num_vehicles' in self.env_params.additional_params.keys(): max_num_vehicles = self.env_params.additional_params[ 'max_num_vehicles'] if max_num_vehicles > 0: num_arrived = self.k.vehicle.get_num_arrived() num_remain = max_num_vehicles - num_arrived vel = self.k.vehicle.get_speed(self.k.vehicle.get_ids()) vel_sum = np.sum(vel) # + num_remain * 30 reward = vel_sum / (num_remain + 1e-6) else: reward = rewards.average_velocity(self) return reward
def compute_reward(self, rl_actions, **kwargs): if rl_actions is None: return {} rewards = {} eta1 = 0.5 eta2 = 0.5 reward1 = -0.1 reward2 = average_velocity(self) / 300 reward = reward1 * eta1 + reward2 * eta2 for rl_id in self.k.vehicle.get_rl_ids(): rewards[rl_id] = reward return rewards
def compute_reward(self, rl_actions, **kwargs): if rl_actions is None: return {} rewards = {} if "eta1" in self.env_params.additional_params.keys(): eta1 = self.env_params.additional_params["eta1"] eta2 = self.env_params.additional_params["eta2"] else: eta1 = 0.9 eta2 = 0.1 reward1 = -0.1 reward2 = average_velocity(self) / 300 reward = reward1 * eta1 + reward2 * eta2 for rl_id in self.k.vehicle.get_rl_ids(): rewards[rl_id] = reward return rewards
def compute_reward(self, rl_actions, **kwargs): # TODO(@evinitsky) we need something way better than this. Something that adds # in notions of local reward """See class definition.""" # in the warmup steps if rl_actions is None: return {} rewards = {} for rl_id in self.k.vehicle.get_rl_ids(): if self.env_params.evaluate: # reward is speed of vehicle if we are in evaluation mode reward = self.k.vehicle.get_speed(rl_id) elif kwargs['fail']: # reward is 0 if a collision occurred reward = 0 else: # reward high system-level velocities cost1 = average_velocity(self, fail=kwargs['fail']) # penalize small time headways cost2 = 0 t_min = 1 # smallest acceptable time headway lead_id = self.k.vehicle.get_leader(rl_id) if lead_id not in ["", None] \ and self.k.vehicle.get_speed(rl_id) > 0: t_headway = max( self.k.vehicle.get_headway(rl_id) / self.k.vehicle.get_speed(rl_id), 0) cost2 += min((t_headway - t_min) / t_min, 0) # weights for cost1, cost2, and cost3, respectively eta1, eta2 = 1.00, 0.10 reward = max(eta1 * cost1 + eta2 * cost2, 0) rewards[rl_id] = reward return rewards