예제 #1
0
q_learning_params = dict()
q_learning_params['exp_exp_tradeoff'] = random.uniform(0,
                                                       1)  # DONE: Add random seed to enable replication. #Only keep the exp_exp_tradeoff here.
q_learning_params['learning_rate'] = 0.7  # Learning rate
q_learning_params['gamma'] = 0.5  # Discounting rate
# Exploration parameters
q_learning_params['epsilon'] = 1.0  # Exploration rate
q_learning_params['max_epsilon'] = 1.0  # Exploration probability at start
q_learning_params['min_epsilon'] = 0.01  # Minimum exploration probability
q_learning_params['decay_rate'] = 0.0001  # Exponential decay rate for exploration prob
load_q_table = False

# Reward Parameters:
give_final_reward = False  # bool: if False, no final reward is given. Step by Step reward only is given.
enable_checks = False





#Don't forget to initialize SimTime before importing vehicle

from Utils.Vehicle import Vehicle
vehicles_list = [Vehicle("LH")]  # NOTE: No error will be produced if some cars are not in this list.
                                 # An error will be produced only when in a not-present ID is requested , Vehicle("RB0"), Vehicle("RB1"), Vehicle("RB2"), Vehicle("RB3")
vehicles_data = dict()  #dict of of lists. Key: Lane index, value: list of indices for agents in this index

num_lanes = 3
lanes_busyness = [0.5, 0.5, 0.5]  # corresponding to lanes: [0, 1, 2] -- i.e.: [bottom-most lane, middle lane, top-most lane]
lanes_busyness_mode = 1  # 0 for placing cars at equal distance, 1 for placing cars every (car_length + minGap + max_speed) with probability = lanes_busyness
    def reset(self, sumoBinary):
        '''
        :function: Resets variables necessary to start next training episode, and reloads randomly initialized next episode XML files.
        :return: None, but resets environment

        :Notes:
        * Commented lines are tautologies (do not add new info), kept only for reference to what is inherited from last
        episode run and from initialization.

        :sources: https://www.eclipse.org/lists/sumo-user/msg03016.html (how to reset SUMO environent from code)
        '''

        # ------------------------------------------------------------------- #
        # 1 :        R E S E T       O L D       V A R I A B L E S
        # ------------------------------------------------------------------- #

        # self.amb_to_change_lane = amb_to_change_lane
        # self.start_pos_for_agents = start_pos_for_agents
        # self.lanes_busyness = lane_busyness_list

        # self.name = self.name
        # self.amb_goal_dist = self.amb_goal_dist
        self.reward = 0.0
        self.emer_start_lane = None

        # self.rel_amb_y_min = self.rel_amb_y_min
        # self.rel_amb_y_max = self.rel_amb_y_max

        # self.count_emergency_vehicles = self.count_emergency_vehicles
        # self.count_ego_vehicles = self.count_ego_vehicles
        # self.agents = self.agents
        # self.emer = self.emer

        self.hidden_state = None
        self.observed_state = None
        self.full_state = None

        # self.Actions = self.Actions
        # self.action_to_string_dict = self.action_to_string_dict

        # ---------------------------------------------------------------------------- #
        # 2 :        R A N D O M L Y      I N I T I A L I Z E       X M L s
        # ---------------------------------------------------------------------------- #
        self.templates_reset(start_pos_for_agents=self.start_pos_for_agents
                             )  # Vehicles list gets reset here

        # ---------------------------------------------------------------------------- #
        # 3 :          I N I T I A T E    S U M O     E N V I R O N M E N T
        #                             and vehicles list
        # ---------------------------------------------------------------------------- #
        traci.load([
            "-c", Sumocfg_DIR, "--tripinfo-output", "tripinfo.xml", "--start",
            "--seed",
            str(Sumo_random_seed), "--quit-on-end", "--start"
        ])

        # Create the real global vehicles list (temporary/fake: initialized one in Config.py with ambulance only):

        self.vehicles_list = [
            Vehicle("LH",
                    vehicle_params=self.vehicle_params,
                    control_algorithm_name="SUMO_KRAUSS")
        ]  # NOTE: No error will be produced if some cars are not in this list.
        # An error will be produced only when in a not-present ID is requested

        # Create the real global vehicles list (temporary/fake: initialized one in Config.py with ambulance only):

        agent_list_index = 0

        for lane, num_cars in vehicles_data.items():
            for agent_index in range(num_cars):
                # set control_type according to chosen percentage:
                if random.uniform(
                        0, 1) < self.rl_percent:  # Then, choose RL ALgorithm
                    control_type = "Q_LEARNING_SINGLE_AGENT"  # possible values: ["Q_LEARNING_SINGLE_AGENT", "SUMO_KRAUSS"]
                else:  # Then, choose the SUMO Algorithm
                    control_type = "SUMO_KRAUSS"  # possible values: ["Q_LEARNING_SINGLE_AGENT", "SUMO_KRAUSS"]

                # The plus one is because the ambulance always comes first in the vehicles list

                self.vehicles_list.append(
                    Vehicle(ID=env.create_vehicle_id(lane, agent_index),
                            vehicle_params=self.vehicle_params,
                            control_algorithm_name=control_type))
                agent_list_index += 1

        for vehc in self.vehicles_list:  # vehicles initialized
            vehc.initialize()

        self.list_of_vehicles = copy.copy(
            self.vehicles_list
        )  # Note: to copy the list, keeping reference to the original vehicles (as opposed to deepcopy, which would copy vehicles)
        self.recount_vehicles()

        # Calculation for optimal time is kept in case the track_len is changed between episodes
        self.optimal_time = int(
            np.round(track_len / self.emer.max_speed)
        )  # Optimal Number of time steps: number of time steps taken by ambulance at maximum speed
        self.max_steps = 20 * self.optimal_time
q_learning_params['exp_exp_tradeoff'] = random.uniform(
    0, 1
)  # TODO: Add random seed to enable replication. #Only keep the exp_exp_tradeoff here.
q_learning_params['learning_rate'] = 0.7  # Learning rate
q_learning_params['gamma'] = 0.5  # Discounting rate
# Exploration parameters
q_learning_params['epsilon'] = 1.0  # Exploration rate
q_learning_params['max_epsilon'] = 1.0  # Exploration probability at start
q_learning_params['min_epsilon'] = 0.01  # Minimum exploration probability
q_learning_params[
    'decay_rate'] = 0.0001  # Exponential decay rate for exploration prob
load_q_table = True

# Reward Parameters:
give_final_reward = False  # bool: if False, no final reward is given. Step by Step reward only is given.

amb_r_w = 1  # ambulance reward weight

agent_acc_r_w = 0.2  # agent according to acceleration  reward weight

agent_chL_r_w = 0.2  # agent according to lane change reward weight

# Checks parameter
enable_checks = True
#Don't forget to initialize SimTime before importing vehicle
from Utils.Vehicle import Vehicle

vehicles_list = [Vehicle("LH"), Vehicle("RB")]

# global variables
step = 0
    def __init__(self,
                 sumoBinary,
                 amb_to_change_lane=ambulance_changes_lane,
                 lane_busyness_list=lanes_busyness,
                 rl_perecent_in=1.0,
                 start_pos_for_agents="middle",
                 name="MultiAgent1.0",
                 ambulance_goal_distance=500,
                 rel_amb_y_min=-41,
                 rel_amb_y_max=16):

        self.amb_to_change_lane = amb_to_change_lane
        self.start_pos_for_agents = start_pos_for_agents
        self.lanes_busyness = lane_busyness_list
        self.rl_percent = rl_perecent_in

        self.name = name  # NOT USED EXCEPT FOR DISPLAY PURPOSES
        self.amb_goal_dist = ambulance_goal_distance
        self.reward = 0.0
        self.emer_start_lane = None
        self.emer_car_len = 2.0
        self.agent_car_len = 2.0

        self.rel_amb_y_min = rel_amb_y_min
        self.rel_amb_y_max = rel_amb_y_max

        self.agents = []  # Stays as is in multiagent
        self.emer = None  # Stays as is in multiagent

        self.hidden_state = None
        self.observed_state = None
        self.full_state = None

        self.Actions = ["change_left", "change_right", "acc", "no_acc", "dec"]
        self.action_to_string_dict = {
            "change_left": 0,
            "change_right": 1,
            "acc": 2,
            "no_acc": 3,
            "dec": 4
        }  # Must maintain order in Actions

        self.count_emergency_vehicles = 0  # Temporary assigned variable, reassigned in .reset()->recount_vehicles() to avoid calling a non-initialized vehicle
        self.count_ego_vehicles = 0  # Temporary assigned variable, reassigned in .reset()->recount_vehicles() to avoid calling a non-initialized vehicle
        # vehicles_data  # dict with: key = lane number, value = number of cars in lane
        self.max_possible_cars = None  # Maximum possible number of cars in lane give starting position

        self.optimal_time = 0  # Temporary assigned variable, reassigned in .reset() to avoid calling a non-initialized vehicle
        self.max_steps = 10000  # Temporary assigned variable, reassigned in .reset() to avoid calling a non-initialized vehicle

        # ---------------------------------------------------------------------------- #
        # 2 :        R A N D O M L Y      I N I T I A L I Z E       X M L s
        #                     and consequently vehicles data
        # ---------------------------------------------------------------------------- #
        self.templates_reset(self.start_pos_for_agents)

        # ---------------------------------------------------------------------------- #
        # 3 :          I N I T I A T E    S U M O     E N V I R O N M E N T
        #                             and vehicles list
        # ---------------------------------------------------------------------------- #
        traci.start([
            sumoBinary, "-c", Sumocfg_DIR, "--tripinfo-output", "tripinfo.xml",
            "--seed",
            str(Sumo_random_seed), "--quit-on-end", "--start"
        ])  # SUMO starts

        self.vehicle_params = dict()
        self.vehicle_params['Actions'] = self.Actions
        self.vehicle_params[
            'action_to_string_dict'] = self.action_to_string_dict
        self.vehicle_params['rel_amb_y_min'] = self.rel_amb_y_min
        self.vehicle_params['rel_amb_y_max'] = self.rel_amb_y_max
        self.vehicle_params['change_lane_if_amb'] = self.amb_to_change_lane

        self.vehicles_list = [
            Vehicle("LH",
                    vehicle_params=self.vehicle_params,
                    control_algorithm_name="SUMO_KRAUSS")
        ]  # NOTE: No error will be produced if some cars are not in this list.
        # An error will be produced only when in a not-present ID is requested

        # Create the real global vehicles list (temporary/fake: initialized one in Config.py with ambulance only):

        agent_list_index = 0

        for lane, num_cars in vehicles_data.items():
            for agent_index in range(num_cars):
                # set control_type according to chosen percentage:
                if random.uniform(
                        0, 1) < self.rl_percent:  # Then, choose RL ALgorithm
                    control_type = "Q_LEARNING_SINGLE_AGENT"  # possible values: ["Q_LEARNING_SINGLE_AGENT", "SUMO_KRAUSS"]
                else:  # Then, choose the SUMO Algorithm
                    control_type = "SUMO_KRAUSS"  # possible values: ["Q_LEARNING_SINGLE_AGENT", "SUMO_KRAUSS"]

                # The plus one is because the ambulance always comes first in the vehicles list

                self.vehicles_list.append(
                    Vehicle(ID=env.create_vehicle_id(lane, agent_index),
                            vehicle_params=self.vehicle_params,
                            control_algorithm_name=control_type))
                agent_list_index += 1

        for vehc in self.vehicles_list:  # vehicles initialized
            vehc.initialize()

        self.list_of_vehicles = copy.copy(
            self.vehicles_list
        )  # Note: to copy the list, keeping reference to the original vehicles (as opposed to deepcopy, which would copy vehicles)
        self.recount_vehicles()