q_learning_params = dict() q_learning_params['exp_exp_tradeoff'] = random.uniform(0, 1) # DONE: Add random seed to enable replication. #Only keep the exp_exp_tradeoff here. q_learning_params['learning_rate'] = 0.7 # Learning rate q_learning_params['gamma'] = 0.5 # Discounting rate # Exploration parameters q_learning_params['epsilon'] = 1.0 # Exploration rate q_learning_params['max_epsilon'] = 1.0 # Exploration probability at start q_learning_params['min_epsilon'] = 0.01 # Minimum exploration probability q_learning_params['decay_rate'] = 0.0001 # Exponential decay rate for exploration prob load_q_table = False # Reward Parameters: give_final_reward = False # bool: if False, no final reward is given. Step by Step reward only is given. enable_checks = False #Don't forget to initialize SimTime before importing vehicle from Utils.Vehicle import Vehicle vehicles_list = [Vehicle("LH")] # NOTE: No error will be produced if some cars are not in this list. # An error will be produced only when in a not-present ID is requested , Vehicle("RB0"), Vehicle("RB1"), Vehicle("RB2"), Vehicle("RB3") vehicles_data = dict() #dict of of lists. Key: Lane index, value: list of indices for agents in this index num_lanes = 3 lanes_busyness = [0.5, 0.5, 0.5] # corresponding to lanes: [0, 1, 2] -- i.e.: [bottom-most lane, middle lane, top-most lane] lanes_busyness_mode = 1 # 0 for placing cars at equal distance, 1 for placing cars every (car_length + minGap + max_speed) with probability = lanes_busyness
def reset(self, sumoBinary): ''' :function: Resets variables necessary to start next training episode, and reloads randomly initialized next episode XML files. :return: None, but resets environment :Notes: * Commented lines are tautologies (do not add new info), kept only for reference to what is inherited from last episode run and from initialization. :sources: https://www.eclipse.org/lists/sumo-user/msg03016.html (how to reset SUMO environent from code) ''' # ------------------------------------------------------------------- # # 1 : R E S E T O L D V A R I A B L E S # ------------------------------------------------------------------- # # self.amb_to_change_lane = amb_to_change_lane # self.start_pos_for_agents = start_pos_for_agents # self.lanes_busyness = lane_busyness_list # self.name = self.name # self.amb_goal_dist = self.amb_goal_dist self.reward = 0.0 self.emer_start_lane = None # self.rel_amb_y_min = self.rel_amb_y_min # self.rel_amb_y_max = self.rel_amb_y_max # self.count_emergency_vehicles = self.count_emergency_vehicles # self.count_ego_vehicles = self.count_ego_vehicles # self.agents = self.agents # self.emer = self.emer self.hidden_state = None self.observed_state = None self.full_state = None # self.Actions = self.Actions # self.action_to_string_dict = self.action_to_string_dict # ---------------------------------------------------------------------------- # # 2 : R A N D O M L Y I N I T I A L I Z E X M L s # ---------------------------------------------------------------------------- # self.templates_reset(start_pos_for_agents=self.start_pos_for_agents ) # Vehicles list gets reset here # ---------------------------------------------------------------------------- # # 3 : I N I T I A T E S U M O E N V I R O N M E N T # and vehicles list # ---------------------------------------------------------------------------- # traci.load([ "-c", Sumocfg_DIR, "--tripinfo-output", "tripinfo.xml", "--start", "--seed", str(Sumo_random_seed), "--quit-on-end", "--start" ]) # Create the real global vehicles list (temporary/fake: initialized one in Config.py with ambulance only): self.vehicles_list = [ Vehicle("LH", vehicle_params=self.vehicle_params, control_algorithm_name="SUMO_KRAUSS") ] # NOTE: No error will be produced if some cars are not in this list. # An error will be produced only when in a not-present ID is requested # Create the real global vehicles list (temporary/fake: initialized one in Config.py with ambulance only): agent_list_index = 0 for lane, num_cars in vehicles_data.items(): for agent_index in range(num_cars): # set control_type according to chosen percentage: if random.uniform( 0, 1) < self.rl_percent: # Then, choose RL ALgorithm control_type = "Q_LEARNING_SINGLE_AGENT" # possible values: ["Q_LEARNING_SINGLE_AGENT", "SUMO_KRAUSS"] else: # Then, choose the SUMO Algorithm control_type = "SUMO_KRAUSS" # possible values: ["Q_LEARNING_SINGLE_AGENT", "SUMO_KRAUSS"] # The plus one is because the ambulance always comes first in the vehicles list self.vehicles_list.append( Vehicle(ID=env.create_vehicle_id(lane, agent_index), vehicle_params=self.vehicle_params, control_algorithm_name=control_type)) agent_list_index += 1 for vehc in self.vehicles_list: # vehicles initialized vehc.initialize() self.list_of_vehicles = copy.copy( self.vehicles_list ) # Note: to copy the list, keeping reference to the original vehicles (as opposed to deepcopy, which would copy vehicles) self.recount_vehicles() # Calculation for optimal time is kept in case the track_len is changed between episodes self.optimal_time = int( np.round(track_len / self.emer.max_speed) ) # Optimal Number of time steps: number of time steps taken by ambulance at maximum speed self.max_steps = 20 * self.optimal_time
q_learning_params['exp_exp_tradeoff'] = random.uniform( 0, 1 ) # TODO: Add random seed to enable replication. #Only keep the exp_exp_tradeoff here. q_learning_params['learning_rate'] = 0.7 # Learning rate q_learning_params['gamma'] = 0.5 # Discounting rate # Exploration parameters q_learning_params['epsilon'] = 1.0 # Exploration rate q_learning_params['max_epsilon'] = 1.0 # Exploration probability at start q_learning_params['min_epsilon'] = 0.01 # Minimum exploration probability q_learning_params[ 'decay_rate'] = 0.0001 # Exponential decay rate for exploration prob load_q_table = True # Reward Parameters: give_final_reward = False # bool: if False, no final reward is given. Step by Step reward only is given. amb_r_w = 1 # ambulance reward weight agent_acc_r_w = 0.2 # agent according to acceleration reward weight agent_chL_r_w = 0.2 # agent according to lane change reward weight # Checks parameter enable_checks = True #Don't forget to initialize SimTime before importing vehicle from Utils.Vehicle import Vehicle vehicles_list = [Vehicle("LH"), Vehicle("RB")] # global variables step = 0
def __init__(self, sumoBinary, amb_to_change_lane=ambulance_changes_lane, lane_busyness_list=lanes_busyness, rl_perecent_in=1.0, start_pos_for_agents="middle", name="MultiAgent1.0", ambulance_goal_distance=500, rel_amb_y_min=-41, rel_amb_y_max=16): self.amb_to_change_lane = amb_to_change_lane self.start_pos_for_agents = start_pos_for_agents self.lanes_busyness = lane_busyness_list self.rl_percent = rl_perecent_in self.name = name # NOT USED EXCEPT FOR DISPLAY PURPOSES self.amb_goal_dist = ambulance_goal_distance self.reward = 0.0 self.emer_start_lane = None self.emer_car_len = 2.0 self.agent_car_len = 2.0 self.rel_amb_y_min = rel_amb_y_min self.rel_amb_y_max = rel_amb_y_max self.agents = [] # Stays as is in multiagent self.emer = None # Stays as is in multiagent self.hidden_state = None self.observed_state = None self.full_state = None self.Actions = ["change_left", "change_right", "acc", "no_acc", "dec"] self.action_to_string_dict = { "change_left": 0, "change_right": 1, "acc": 2, "no_acc": 3, "dec": 4 } # Must maintain order in Actions self.count_emergency_vehicles = 0 # Temporary assigned variable, reassigned in .reset()->recount_vehicles() to avoid calling a non-initialized vehicle self.count_ego_vehicles = 0 # Temporary assigned variable, reassigned in .reset()->recount_vehicles() to avoid calling a non-initialized vehicle # vehicles_data # dict with: key = lane number, value = number of cars in lane self.max_possible_cars = None # Maximum possible number of cars in lane give starting position self.optimal_time = 0 # Temporary assigned variable, reassigned in .reset() to avoid calling a non-initialized vehicle self.max_steps = 10000 # Temporary assigned variable, reassigned in .reset() to avoid calling a non-initialized vehicle # ---------------------------------------------------------------------------- # # 2 : R A N D O M L Y I N I T I A L I Z E X M L s # and consequently vehicles data # ---------------------------------------------------------------------------- # self.templates_reset(self.start_pos_for_agents) # ---------------------------------------------------------------------------- # # 3 : I N I T I A T E S U M O E N V I R O N M E N T # and vehicles list # ---------------------------------------------------------------------------- # traci.start([ sumoBinary, "-c", Sumocfg_DIR, "--tripinfo-output", "tripinfo.xml", "--seed", str(Sumo_random_seed), "--quit-on-end", "--start" ]) # SUMO starts self.vehicle_params = dict() self.vehicle_params['Actions'] = self.Actions self.vehicle_params[ 'action_to_string_dict'] = self.action_to_string_dict self.vehicle_params['rel_amb_y_min'] = self.rel_amb_y_min self.vehicle_params['rel_amb_y_max'] = self.rel_amb_y_max self.vehicle_params['change_lane_if_amb'] = self.amb_to_change_lane self.vehicles_list = [ Vehicle("LH", vehicle_params=self.vehicle_params, control_algorithm_name="SUMO_KRAUSS") ] # NOTE: No error will be produced if some cars are not in this list. # An error will be produced only when in a not-present ID is requested # Create the real global vehicles list (temporary/fake: initialized one in Config.py with ambulance only): agent_list_index = 0 for lane, num_cars in vehicles_data.items(): for agent_index in range(num_cars): # set control_type according to chosen percentage: if random.uniform( 0, 1) < self.rl_percent: # Then, choose RL ALgorithm control_type = "Q_LEARNING_SINGLE_AGENT" # possible values: ["Q_LEARNING_SINGLE_AGENT", "SUMO_KRAUSS"] else: # Then, choose the SUMO Algorithm control_type = "SUMO_KRAUSS" # possible values: ["Q_LEARNING_SINGLE_AGENT", "SUMO_KRAUSS"] # The plus one is because the ambulance always comes first in the vehicles list self.vehicles_list.append( Vehicle(ID=env.create_vehicle_id(lane, agent_index), vehicle_params=self.vehicle_params, control_algorithm_name=control_type)) agent_list_index += 1 for vehc in self.vehicles_list: # vehicles initialized vehc.initialize() self.list_of_vehicles = copy.copy( self.vehicles_list ) # Note: to copy the list, keeping reference to the original vehicles (as opposed to deepcopy, which would copy vehicles) self.recount_vehicles()