Beispiel #1
0
    def test_average_velocity(self):
        """Test the average_velocity method."""
        vehicles = VehicleParams()
        vehicles.add("test", num_vehicles=10)

        env, _ = ring_road_exp_setup(vehicles=vehicles)

        # check that the fail attribute leads to a zero return
        self.assertEqual(average_velocity(env, fail=True), 0)

        # check the average speed upon reset
        self.assertEqual(average_velocity(env, fail=False), 0)

        # change the speed of one vehicle
        env.k.vehicle.test_set_speed("test_0", 10)

        # check the new average speed
        self.assertEqual(average_velocity(env, fail=False), 1)

        # recreate the environment with no vehicles
        vehicles = VehicleParams()
        env, _ = ring_road_exp_setup(vehicles=vehicles)

        # check that the reward function return 0 in the case of no vehicles
        self.assertEqual(average_velocity(env, fail=False), 0)
Beispiel #2
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""

        if kwargs['fail'] == False:
            return rewards.average_velocity(self)
        else:
            return rewards.average_velocity(self) - 600
        '''
Beispiel #3
0
 def compute_reward(self, rl_actions, **kwargs):
     if self.env_params.evaluate:
         return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))
     else:
         if len(self.k.vehicle.get_ids()) == 0:
             return 1.0
         reward = rewards.average_velocity(self)
         return reward / 30
Beispiel #4
0
    def test_average_velocity(self):
        """Test the average_velocity method."""
        vehicles = Vehicles()
        vehicles.add("test", num_vehicles=10)

        env, scenario = ring_road_exp_setup(vehicles=vehicles)

        # check that the fail attribute leads to a zero return
        self.assertEqual(average_velocity(env, fail=True), 0)

        # check the average speed upon reset
        self.assertEqual(average_velocity(env, fail=False), 0)

        # change the speed of one vehicle
        env.vehicles.test_set_speed("test_0", 10)

        # check the new average speed
        self.assertEqual(average_velocity(env, fail=False), 1)
Beispiel #5
0
 def compute_reward(self, rl_actions, **kwargs):
     if 'max_num_vehicles' in self.env_params.additional_params.keys():
         max_num_vehicles = self.env_params.additional_params[
             'max_num_vehicles']
         if max_num_vehicles > 0:
             num_arrived = self.k.vehicle.get_num_arrived()
             num_remain = max_num_vehicles - num_arrived
             vel = self.k.vehicle.get_speed(self.k.vehicle.get_ids())
             vel_sum = np.sum(vel)  # + num_remain * 30
             reward = vel_sum / (num_remain + 1e-6)
     else:
         reward = rewards.average_velocity(self)
     return reward
Beispiel #6
0
    def compute_reward(self, rl_actions, **kwargs):
        if rl_actions is None:
            return {}

        rewards = {}
        eta1 = 0.5
        eta2 = 0.5
        reward1 = -0.1
        reward2 = average_velocity(self) / 300
        reward = reward1 * eta1 + reward2 * eta2
        for rl_id in self.k.vehicle.get_rl_ids():
            rewards[rl_id] = reward
        return rewards
Beispiel #7
0
    def compute_reward(self, rl_actions, **kwargs):
        if rl_actions is None:
            return {}

        rewards = {}
        if "eta1" in self.env_params.additional_params.keys():
            eta1 = self.env_params.additional_params["eta1"]
            eta2 = self.env_params.additional_params["eta2"]
        else:
            eta1 = 0.9
            eta2 = 0.1
        reward1 = -0.1
        reward2 = average_velocity(self) / 300
        reward = reward1 * eta1 + reward2 * eta2
        for rl_id in self.k.vehicle.get_rl_ids():
            rewards[rl_id] = reward
        return rewards
Beispiel #8
0
    def compute_reward(self, rl_actions, **kwargs):
        # TODO(@evinitsky) we need something way better than this. Something that adds
        # in notions of local reward
        """See class definition."""
        # in the warmup steps
        if rl_actions is None:
            return {}

        rewards = {}
        for rl_id in self.k.vehicle.get_rl_ids():
            if self.env_params.evaluate:
                # reward is speed of vehicle if we are in evaluation mode
                reward = self.k.vehicle.get_speed(rl_id)
            elif kwargs['fail']:
                # reward is 0 if a collision occurred
                reward = 0
            else:
                # reward high system-level velocities
                cost1 = average_velocity(self, fail=kwargs['fail'])

                # penalize small time headways
                cost2 = 0
                t_min = 1  # smallest acceptable time headway

                lead_id = self.k.vehicle.get_leader(rl_id)
                if lead_id not in ["", None] \
                        and self.k.vehicle.get_speed(rl_id) > 0:
                    t_headway = max(
                        self.k.vehicle.get_headway(rl_id) /
                        self.k.vehicle.get_speed(rl_id), 0)
                    cost2 += min((t_headway - t_min) / t_min, 0)

                # weights for cost1, cost2, and cost3, respectively
                eta1, eta2 = 1.00, 0.10

                reward = max(eta1 * cost1 + eta2 * cost2, 0)

            rewards[rl_id] = reward
        return rewards