def test_desired_velocity(self):
        """Test the desired_velocity methods."""
        vehicles = Vehicles()
        vehicles.add("test", num_vehicles=10)

        env_params = EnvParams(additional_params={
            "target_velocity": 10,
            "max_accel": 1,
            "max_decel": 1
        })

        env, scenario = ring_road_exp_setup(vehicles=vehicles,
                                            env_params=env_params)

        # check that the fail attribute leads to a zero return
        self.assertEqual(desired_velocity(env, fail=True), 0)

        # check the average speed upon reset
        self.assertEqual(desired_velocity(env, fail=False), 0)

        # change the speed of one vehicle
        env.vehicles.test_set_speed("test_0", 10)

        # check the new average speed
        self.assertAlmostEqual(desired_velocity(env, fail=False), 0.05131670)
Exemple #2
0
    def compute_reward(self, rl_actions, **kwargs):
        """
        :param rl_actions: The output of the PPO network based on the last steps observation per agent
        :param kwargs: Can contain fail, to indicate that a crash happened
        :return: The individual reward for every agent
        """
        # if a crash occurred everybody gets 0
        if kwargs['fail']:
            return {rl_id: 0 for rl_id in self.k.vehicle.get_rl_ids()}

        # Average outflow over last 10 steps, divided 2000 * scaling.
        # TODO: NEXT: try own reward computation
        outflow_reward = 2 / 3 * self.k.vehicle.get_rl_outflow_rate(
            10 * self.sim_step) / (2000.0 * self.scaling)

        rl_agent_rewards = {}
        if rl_actions:
            for rl_id in self.k.vehicle.get_rl_ids():
                # Reward desired velocity in own edge + the total outflow
                edge_num = self.k.vehicle.get_edge(rl_id)
                rl_agent_rewards[rl_id] = 1 / 3 * rewards.desired_velocity(
                    self, edge_list=[edge_num
                                     ], use_only_rl_ids=True) + outflow_reward

        return rl_agent_rewards
Exemple #3
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        if self.env_params.evaluate:
            return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))
        else:
            # return a reward of 0 if a collision occurred
            if kwargs["fail"]:
                return 0

            # reward high system-level velocities
            cost1 = rewards.desired_velocity(self, fail=kwargs["fail"])

            # penalize small time headways
            cost2 = 0
            t_min = 1  # smallest acceptable time headway
            for rl_id in self.rl_veh:
                lead_id = self.k.vehicle.get_leader(rl_id)
                if lead_id not in ["", None] \
                        and self.k.vehicle.get_speed(rl_id) > 0:
                    t_headway = max(
                        self.k.vehicle.get_headway(rl_id) /
                        self.k.vehicle.get_speed(rl_id), 0)
                    cost2 += min((t_headway - t_min) / t_min, 0)
            if "max_inflow" in self.env_params.additional_params.keys():
                #print("max_inflow specified ")
                max_inflow = self.env_params.additional_params["max_inflow"]
            else:
                max_inflow = 2200
            InflowScale = rewards.optimize_inflow(self,
                                                  max_flow=max_inflow,
                                                  timespan=500)
            # weights for cost1, cost2, and cost3, respectively
            eta1, eta2 = 1.00, 0.00
            reward = max(eta1 * cost1 + eta2 * cost2, 0) * InflowScale
            return reward
Exemple #4
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     num_rl = self.k.vehicle.num_rl_vehicles
     lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl])
     return (rewards.desired_velocity(self) + rewards.rl_forward_progress(
         self, gain=0.1) - rewards.boolean_action_penalty(
             lane_change_acts, gain=1.0))
Exemple #5
0
    def compute_reward(self, rl_actions, **kwargs):
        current_rl_vehs = self.rl_veh
        edges = []
        for veh_id in current_rl_vehs:
            edge = self.k.vehicle.get_edge(veh_id)
            if edge not in edges:
                edges.append(edge)
        interested_vehs = self.k.vehicle.get_ids_by_edge(edges)
        if len(interested_vehs) > 0:
            cost1 = rewards.desired_velocity(self,
                                             fail=kwargs["fail"],
                                             edge_list=edges)

            # penalize small time headways
            cost2 = 0
            t_min = 1  # smallest acceptable time headway
            for rl_id in self.rl_veh:
                lead_id = self.k.vehicle.get_leader(rl_id)
                if lead_id not in ["", None] \
                        and self.k.vehicle.get_speed(rl_id) > 0:
                    t_headway = max(
                        self.k.vehicle.get_headway(rl_id) /
                        self.k.vehicle.get_speed(rl_id), 0)
                    cost2 += min((t_headway - t_min) / t_min, 0)

            # weights for cost1, cost2, and cost3, respectively
            eta1, eta2 = 1.00, 0.10

            return max(eta1 * cost1 + eta2 * cost2, 0)
        else:
            return 0
Exemple #6
0
    def compute_reward(self, rl_actions=None, rl_states=None, **kwargs):
        """See class definition."""

        if rl_actions == None or rl_actions == {}:
            return {}, {}

        rew = {}
        info = {}
        cost1 = rewards.desired_velocity(self, fail=kwargs["fail"])
        mean_vel = np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))
        outflow = self.k.vehicle.get_outflow_rate(100)
        if outflow == None:
            outflow = 0.0

        # penalize small time headways
        t_min = self.env_params.additional_params[
            "t_min"]  # smallest acceptable time headway

        for rl_id in self.k.vehicle.get_rl_ids():
            if rl_id in rl_actions:
                state = rl_states[rl_id]
                action = rl_actions[rl_id]
                reward = self.calculate_reward(state, action)
            else:
                reward = 0.0
            # add the other information
            cost2 = 0.0
            current_edge = self.k.vehicle.get_edge(rl_id)
            lead_id = self.k.vehicle.get_leader(rl_id)
            if lead_id not in ["", None] \
                    and self.k.vehicle.get_speed(rl_id) > 0:
                t_headway = max(
                    self.k.vehicle.get_headway(rl_id) /
                    self.k.vehicle.get_speed(rl_id), 0)
                cost2 += min((t_headway - t_min) / t_min, 0.0)

            info.update({
                rl_id: {
                    'cost1': cost1,
                    'cost2': cost2,
                    'mean_vel': mean_vel,
                    "outflow": outflow
                }
            })

            # update reward
            rew.update({rl_id: reward})

            if kwargs["fail"]:
                rew.update({rl_id: 0.0})
                info.update({
                    rl_id: {
                        'cost1': cost1,
                        'cost2': cost2,
                        'mean_vel': mean_vel,
                        "outflow": outflow
                    }
                })

        return rew, info
Exemple #7
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        # Compute the common reward.
        reward = rewards.desired_velocity(self, fail=kwargs['fail'])

        # Reward is shared by all agents.
        return {key: reward for key in self.k.vehicle.get_rl_ids()}
Exemple #8
0
    def get_state(self):
        """See class definition."""
        obs = {}

        # normalizing constants
        max_speed = self.k.network.max_speed()
        target_speed = desired_velocity(self)

        for rl_id in self.k.vehicle.get_rl_ids():
            this_speed = self.k.vehicle.get_speed(rl_id)
            lead_speeds = self.k.vehicle.get_lane_leaders_speed(rl_id)
            lead_headways = self.k.vehicle.get_lane_headways(rl_id)
            follower_speeds = self.k.vehicle.get_lane_followers_speed(rl_id)
            follower_headways = self.k.vehicle.get_lane_tailways(rl_id)

            if len(lead_headways) > 1:
                observation = np.array([
                    this_speed, target_speed, max_speed, lead_headways[0],
                    lead_headways[1], lead_headways[2], lead_speeds[0],
                    lead_speeds[1], lead_speeds[2], follower_headways[0],
                    follower_headways[1], follower_headways[2],
                    follower_speeds[0], follower_speeds[1], follower_speeds[2]
                ])
            else:
                observation = np.array([
                    this_speed, target_speed, max_speed, lead_headways[0],
                    lead_headways[0], lead_headways[0], lead_speeds[0],
                    lead_speeds[0], lead_speeds[0], follower_headways[0],
                    follower_headways[0], follower_headways[0],
                    follower_speeds[0], follower_speeds[0], follower_speeds[0]
                ])

            obs.update({rl_id: observation})

        return obs["rl_0"]
    def compute_reward(self, rl_actions, **kwargs):
#        """See class definition."""
#        # return a reward of 0 if a collision occurred
#        if kwargs["fail"]:
#            return 0
#
#        # reward high system-level velocities, but according to their L_2 norm,
#        # which is bad, since encourages increase in high-speeds more than in
#        # low-speeds and is not the real-reward
#        #
#        #return rewards.desired_velocity(self, fail=kwargs["fail"])
#        veh_ids = self.vehicles.get_ids() 
#        vel = np.array(self.vehicles.get_speed(veh_ids))
#        num_vehicles = len(veh_ids)
#
#        if any(vel < -100):
#            return 0.
#
#        target_vel = self.env_params.additional_params['target_velocity']
#        max_reward = np.array([target_vel])
#        print("max_reward " + str(max_reward))
#
#        reward = np.sum(vel) / num_vehicles
#        print("reward " + str(reward))
#        
#        #return reward / max_reward
#        return reward
        """See class definition."""
        # return a reward of 0 if a collision occurred
        if kwargs["fail"]:
            return 0
        
        # reward high system-level velocities
        return rewards.desired_velocity(self, fail=kwargs["fail"])
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        if self.env_params.evaluate:
            return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))
        else:
            # return a reward of 0 if a collision occurred
            if kwargs["fail"]:
                return 0

            # reward high system-level velocities
            cost1 = rewards.desired_velocity(self, fail=kwargs["fail"])

            # penalize small time headways
            cost2 = 0
            t_min = 1  # smallest acceptable time headway
            for rl_id in self.rl_veh:
                lead_id = self.k.vehicle.get_leader(rl_id)
                if lead_id not in ["", None] \
                        and self.k.vehicle.get_speed(rl_id) > 0:
                    t_headway = max(
                        self.k.vehicle.get_headway(rl_id) /
                        self.k.vehicle.get_speed(rl_id), 0)
                    cost2 += min((t_headway - t_min) / t_min, 0)

            # weights for cost1, cost2, and cost3, respectively
            eta1, eta2 = 1.00, 0.0

            return max(eta1 * cost1, 0)
Exemple #11
0
    def compute_reward(self, state, rl_actions, **kwargs):
        """
        See parent class
        """
        # reward desired velocity
        reward = rewards.desired_velocity(self, fail=kwargs["fail"])

        return reward
Exemple #12
0
    def compute_reward(self, state, rl_actions, **kwargs):
        # compute the system-level performance of vehicles from a velocity
        # perspective
        reward = rewards.desired_velocity(self, fail=kwargs["fail"])

        # punish excessive lane changes by reducing the reward by a set value
        # every time an rl car changes lanes (10% of max reward)
        for veh_id in self.vehicles.get_rl_ids():
            if self.vehicles.get_state(veh_id, "last_lc") == self.time_counter:
                reward -= 0.1

        return reward
Exemple #13
0
    def compute_reward(self, rl_actions, **kwargs):
        """The agents receives opposing speed rewards.

        The agent receives the class definition reward,
        the adversary receives the negative of the agent reward
        """
        if self.env_params.evaluate:
            reward = np.mean(self.vehicles.get_speed(self.vehicles.get_ids()))
            return {'av': reward, 'adversary': -reward}
        else:
            reward = rewards.desired_velocity(self, fail=kwargs['fail'])
            return {'av': reward, 'adversary': -reward}
Exemple #14
0
    def compute_reward(self, state, rl_actions, **kwargs):
        vel_reward = rewards.desired_velocity(self, fail=kwargs["fail"])

        # Use a similar weighting of of the headway reward as the velocity
        # reward
        max_cost = np.array([self.env_params.additional_params[
                                 "target_velocity"]] *
                            self.vehicles.num_vehicles)
        max_cost = np.linalg.norm(max_cost)
        normalization = self.scenario.length / self.vehicles.num_vehicles
        headway_reward = 0.2 * max_cost * rewards.penalize_headway_variance(
            self.vehicles, self.sorted_extra_data, normalization)
        return vel_reward + headway_reward
Exemple #15
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        # Compute the common reward.
        reward = rewards.desired_velocity(self, fail=kwargs['fail'])
        #reward_dict = {key: reward for key in self.k.vehicle.get_rl_ids()}
        max_speed = self.k.network.max_speed()
        reward_dict = {}
        penalty_coef = 0.5
        fixed_reward = 0.05
        for rl_id in self.k.vehicle.get_rl_ids():
            if self.k.vehicle.get_speed(rl_id) < -2 * max_speed:
                # this condition occurs if there is a crash, applies penalty proportionate to size of system
                reward_dict[rl_id] = -penalty_coef * len(
                    self.k.vehicle.get_rl_ids())
            else:
                reward_dict[rl_id] = reward + fixed_reward

        # Reward is shared by all agents.
        return reward_dict
Exemple #16
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        if self.env_params.evaluate:
            return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))
        else:
            # return a reward of 0 if a collision occurred
            if kwargs["fail"]:
                return 0

            # reward high system-level velocities
            cost1 = rewards.desired_velocity(self, fail=kwargs["fail"])

            # encourage rl to move
            cost2 = rewards.rl_forward_progress(self,
                                                gain=1) / (30 * self.num_rl)
            # weights for cost1, cost2, and cost3, respectively
            eta1, eta2 = 0.50, 0.50

            return max(eta1 * cost1 + eta2 * cost2, 0)
    def compute_reward(self, state, rl_actions, **kwargs):
        """
        See parent class

        The reward function is negative norm of the difference between the
        velocities of each vehicle, and the target velocity. Also, a small
        penalty is added for rl lane changes in order to encourage mimizing
        lane-changing action.
        """
        # compute the system-level performance of vehicles from a velocity
        # perspective
        reward = rewards.desired_velocity(self, fail=kwargs["fail"])

        # punish excessive lane changes by reducing the reward by a set value
        # every time an rl car changes lanes
        for veh_id in self.rl_ids:
            if self.vehicles.get_state(veh_id, "last_lc") == self.timer:
                reward -= 1

        return reward
Exemple #18
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        # in the warmup steps
        if rl_actions is None:
            return {}

        rewards = {}
        for rl_id in self.k.vehicle.get_rl_ids():
            if self.env_params.evaluate:
                # reward is speed of vehicle if we are in evaluation mode
                reward = self.k.vehicle.get_speed(rl_id)
            elif kwargs['fail']:
                # reward is 0 if a collision occurred
                reward = 0
            else:
                # reward high system-level velocities
                actual_speed = self.k.vehicle.get_speed(rl_id)
                # cost1 = desired_velocity(self, fail=kwargs['fail']) / actual_speed if actual_speed > 0 else 0
                cost1 = (actual_speed /
                         desired_velocity(self, fail=kwargs['fail'])) * 10

                # penalize small time headways
                cost2 = 0
                t_min = 1  # smallest acceptable time headway

                lead_id = self.k.vehicle.get_leader(rl_id)
                if lead_id not in ["", None
                                   ] and self.k.vehicle.get_speed(rl_id) > 0:
                    t_headway = max(
                        self.k.vehicle.get_headway(rl_id) /
                        self.k.vehicle.get_speed(rl_id), 0)
                    cost2 += min((t_headway - t_min) / t_min, 0)

                # weights for cost1, cost2, and cost3, respectively
                eta1, eta2 = 1.00, 0.10

                reward = max(eta1 * cost1 + eta2 * cost2, 0)

            rewards[rl_id] = reward
        return rewards
Exemple #19
0
    def compute_reward(self, rl_actions, **kwargs):
        return_rewards = {}
        # in the warmup steps, rl_actions is None
        if rl_actions:
            # for rl_id, actions in rl_actions.items():
            for rl_id in self.k.vehicle.get_rl_ids():

                reward = 0
                # If there is a collision all agents get no reward
                if not kwargs['fail']:
                    # Reward desired velocity in own edge
                    edge_num = self.k.vehicle.get_edge(rl_id)
                    reward += rewards.desired_velocity(self,
                                                       fail=kwargs['fail'],
                                                       edge_list=[edge_num],
                                                       use_only_rl_ids=True)

                    # Punish own lane changing
                    if rl_id in rl_actions:
                        reward -= abs(rl_actions[rl_id][1])

                return_rewards[rl_id] = reward
        return return_rewards
    def test_desired_velocity(self):
        """Test the desired_velocity method."""
        vehicles = VehicleParams()
        vehicles.add("test", num_vehicles=10)

        env_params = EnvParams(
            additional_params={
                "target_velocity": np.sqrt(10),
                "max_accel": 1,
                "max_decel": 1,
                "sort_vehicles": False
            })

        env, _, _ = ring_road_exp_setup(vehicles=vehicles,
                                        env_params=env_params)

        # check that the fail attribute leads to a zero return
        self.assertEqual(desired_velocity(env, fail=True), 0)

        # check the average speed upon reset
        self.assertEqual(desired_velocity(env, fail=False), 0)

        # check the average speed upon reset with a subset of edges
        self.assertEqual(
            desired_velocity(env, edge_list=["bottom"], fail=False), 0)

        # change the speed of one vehicle
        env.k.vehicle.test_set_speed("test_0", np.sqrt(10))

        # check the new average speed
        self.assertAlmostEqual(desired_velocity(env, fail=False),
                               1 - np.sqrt(90) / 10)

        # check the new average speed for a subset of edges
        self.assertAlmostEqual(
            desired_velocity(env, edge_list=["bottom"], fail=False),
            1 - np.sqrt(20) / np.sqrt(30))

        # change the speed of one of the vehicles outside the edge list
        env.k.vehicle.test_set_speed("test_8", 10)

        # check that the new average speed is the same as before
        self.assertAlmostEqual(
            desired_velocity(env, edge_list=["bottom"], fail=False),
            1 - np.sqrt(20) / np.sqrt(30))
Exemple #21
0
    def compute_reward(self, state, rl_actions, **kwargs):
        # return a reward of 0 if a collision occurred
        if kwargs["fail"]:
            return 0

        # reward high system-level velocities
        cost1 = rewards.desired_velocity(self, fail=kwargs["fail"])

        # penalize large time headways
        cost2 = 0
        t_min = 1  # smallest acceptable time headway
        for rl_id in self.rl_veh:
            lead_id = self.vehicles.get_leader(rl_id)
            if lead_id not in ["", None] \
                    and self.vehicles.get_speed(rl_id) > 0:
                t_headway = max(
                    self.vehicles.get_headway(rl_id) /
                    self.vehicles.get_speed(rl_id), 0)
                cost2 += min(t_headway - t_min, 0)

        # weights for cost1, cost2, and cost3, respectively
        eta1, eta2 = 1.00, 0.10

        return eta1 * cost1 + eta2 * cost2
Exemple #22
0
 def compute_reward(self, state, rl_actions, **kwargs):
     return rewards.desired_velocity(self, fail=kwargs["fail"])
Exemple #23
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     if self.env_params.evaluate:
         return -rewards.min_delay_unscaled(self)
     else:
         return rewards.desired_velocity(self, fail=kwargs["fail"])
Exemple #24
0
 def compute_reward(self, rl_actions, **kwargs):
     return rewards.desired_velocity(self, edge_list=["edge2", "edge3"])
Exemple #25
0
 def compute_reward(self, state, rl_actions, **kwargs):
     if self.env_params.evaluate:
         return rewards.min_delay_unscaled(self)
     else:
         return rewards.desired_velocity(self, fail=kwargs["fail"])
Exemple #26
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     if self.env_params.evaluate:
         return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))
     else:
         return rewards.desired_velocity(self, fail=kwargs['fail'])
Exemple #27
0
    def compute_reward(self, rl_actions=None, rl_states=None, **kwargs):
        """See class definition."""
        if self.env_params.evaluate:
            return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))
        else:
            ## return a reward of 0 if a collision occurred
            #if kwargs["fail"]:
            #    return 0

            rew = {}
            info = {}

            scale = self.env_params.additional_params["reward_scale"]
            # weights for cost1, cost2, and cost3, respectively
            eta1 = self.env_params.additional_params["eta1"]
            eta2 = self.env_params.additional_params["eta2"]
            eta3 = self.env_params.additional_params["eta3"]
            FLOW_RATE = self.env_params.additional_params["FLOW_RATE"]

            # reward high system-level velocities
            cost1 = rewards.desired_velocity(self, fail=kwargs["fail"])
            # print("cost1: {}".format(cost1))
            mean_vel = np.mean(
                self.k.vehicle.get_speed(self.k.vehicle.get_ids()))
            outflow = self.k.vehicle.get_outflow_rate(100)
            if outflow == None:
                outflow = 0.0

            # penalize small time headways
            t_min = self.env_params.additional_params[
                "t_min"]  # smallest acceptable time headway
            for rl_id in self.k.vehicle.get_rl_ids():
                cost2 = 0.0
                current_edge = self.k.vehicle.get_edge(rl_id)
                # don't control cars in inflow edge,
                # if 'inflow' in current_edge:
                #     continue

                lead_id = self.k.vehicle.get_leader(rl_id)
                if lead_id not in ["", None] \
                        and self.k.vehicle.get_speed(rl_id) > 0:
                    t_headway = max(
                        self.k.vehicle.get_headway(rl_id) /
                        self.k.vehicle.get_speed(rl_id), 0)
                    cost2 += min((t_headway - t_min) / t_min, 0.0)
                rew.update({
                    rl_id:
                    max(
                        eta1 * cost1 + eta2 * cost2 +
                        eta3 * outflow / FLOW_RATE, 0.0) * scale - 1.0
                })
                info.update({
                    rl_id: {
                        'cost1': cost1,
                        'cost2': cost2,
                        'mean_vel': mean_vel,
                        "outflow": outflow
                    }
                })
                if kwargs["fail"]:
                    rew.update({rl_id: 0.0})
                    info.update({
                        rl_id: {
                            'cost1': cost1,
                            'cost2': cost2,
                            'mean_vel': mean_vel,
                            "outflow": outflow
                        }
                    })

            return rew, info
Exemple #28
0
 def compute_reward(self, state, rl_actions, **kwargs):
     if self.env_params.evaluate:
         return np.mean(self.vehicles.get_speed(self.vehicles.get_ids()))
     else:
         return rewards.desired_velocity(self, fail=kwargs["fail"])