def reset(self): # context subscriptions # traci.close(wait=True) # Reset before simulationOneStep self.clear_metrics() if not self.router: self.generate_route_file() else: self.router.generate_route_file(self.route_file) # traci.start(self.cmd_lines) traci.load(self.load_cmd) traci.junction.subscribeContext( "0", traci.constants.CMD_GET_VEHICLE_VARIABLE, self.region, [ traci.constants.VAR_LANE_ID, traci.constants.VAR_ROAD_ID, traci.constants.VAR_WAITING_TIME, traci.constants.VAR_LANEPOSITION, traci.constants.VAR_SPEED ]) # reset the traffic lights to the action green phases while self.last_green_time < self.min_green_time: self.simulate_one_step() if traci.trafficlight.getPhase( 'tlstatic') in self.green_phases2index: self.last_green_time += 1 else: self.last_green_time = 0 cx_res = traci.junction.getContextSubscriptionResults("0") return self.get_observations(cx_res)
def reset(self): self.end = 0 self.TotalReward = 0 self.oldDistance = 0 self.nowDistance = 0 self.lastTlsTd = "cross_6" self.lastdistance = 0.99 self.x_v = 0 self.y_v = 0 traci.load([ "-c", config_path, "--collision.action", "remove", "--no-step-log", "--no-warnings", "--no-duration-log" ]) print("Resetting...") #traci.vehicle.add("agent", "agent_route") random_direct = random.randint(0, 1) random_edge = random.randint(3, 12) if random_direct == 0: direct = '-' else: direct = '' if random_edge != 12: edge = str(random_edge) else: edge = '0' dest_name = direct + "gneE" + edge print(dest_name) pos = self.trafficPos_mapping[self.cross_mapping[dest_name]] self.end_x, self.end_y = pos[0], pos[1] traci.vehicle.changeTarget("agent", dest_name) self.Route = traci.vehicle.getRoute(self.AgentId) print(self.Route) traci.vehicle.setColor("agent", (255, 0, 0, 255)) traci.vehicle.setSpeed("agent", 10) traci.gui.trackVehicle('View #0', "agent") traci.simulationStep() AgentAvailable = False while AgentAvailable == False: traci.simulationStep() self.VehicleIds = traci.vehicle.getIDList() if self.AgentId in self.VehicleIds: AgentAvailable = True self.StartTime = traci.simulation.getCurrentTime() for vehId in self.VehicleIds: traci.vehicle.subscribe(vehId, (tc.VAR_SPEED, tc.VAR_POSITION, tc.VAR_LANE_INDEX, tc.VAR_DISTANCE)) traci.vehicle.subscribeLeader(self.AgentId, 50) if vehId == self.AgentId: traci.vehicle.setSpeedMode(self.AgentId, 0) traci.vehicle.setLaneChangeMode(self.AgentId, 0) self.state, breaklight, breakstop, wronglane = self.perception() return self.state
def _inner_reset(self): """ Resets the environment to initial state. :return: The initial state """ # Changing configuration self._choose_random_simulation() if self.seed is None: self.update_seed() # Loads traci configuration traci.load(self.sumoCmd[1:]) # Resetting configuration self._setup_basic_environment_values() # Running simulation until ego can be inserted self._select_egos() self._get_simulation_environment() # Getting initial environment state self._refresh_environment() # Init lateral model # Setting a starting speed of the ego self.state['speed'] = self.desired_speed if "continuous" in self.type_as: self.lateral_model = LateralModel(self.state, lane_width=self.lane_width, dt=self.dt) return self._get_observation()
def reset(self): ### #self.generate_routefile_two_intersections() self.generate_routefile() traci.load([ "-c", "one_lane/one_lane4.sumocfg", "--collision.check-junctions", "1", "--start", '--no-step-log', 'true' ]) ### #traci.load(["-c", "one_intersection_w_priority/one_intersection_w_priority.sumocfg", "--collision.check-junctions", "1", "--start"]) run = Assembler(self.carID) terminate = False while terminate == False: num_vec = traci.vehicle.getIDList() for i in range(len(num_vec)): if num_vec[i] != 'ego': traci.vehicle.setLaneChangeMode(num_vec[i], 512) self.output = run.getTensor() if self.output is not None: terminate = True traci.simulationStep() return self.getObservation()
def reset(self, gui=False): # return obs # for node_name in self.nodes_name: # self.nodes[node_name].reset() self.cur_episode += 1 self.cur_step = 0 # self.close() if gui: app = 'sumo-gui' else: app = 'sumo' command = ['--start', '-c', self.cfg_sumo] # command += ['--seed', str(self.sim_seed)] # command += ['--no-step-log', 'True'] # command += ['--time-to-teleport', '300'] # command += ['--no-warnings', 'True'] # command += ['--duration-log.disable', 'True'] # command += ['--tripinfo-output', # self.output_path + ('%s_%s_trip.xml' % (self.name, self.agent))] traci.load(command) s = 'Env: init %d node information:\n' % len(self.nodes_name) for node_name in self.nodes_name: s += node_name + ':\n' s += '\tneigbor: %s\n' % str(self.nodes[node_name].neighbor) logging.info(s) for node_name in self.nodes_name: traci.junction.subscribeContext( node_name, tc.CMD_GET_VEHICLE_VARIABLE, self.ild_length, [ tc.VAR_LANE_ID, tc.VAR_LANEPOSITION, tc.VAR_SPEED, tc.VAR_WAITING_TIME ]) cx_res = {node_name: traci.junction.getContextSubscriptionResults(node_name) \ for node_name in self.nodes_name} return self._get_obs(cx_res)
def simulate_run_simple(speed_s,speed_lc,range_x,wfile,idx,crash_count,no_crash_count): traci.load(load_params) traci.vehicle.add(vehID='veh_0', routeID='route0', depart=0,pos=0, speed=speed_s) traci.vehicle.add(vehID='veh_1', routeID='route0', depart=0,pos=range_x, speed=speed_lc) traci.vehicle.moveTo('veh_0','1to2_0',0) traci.vehicle.moveTo('veh_1','1to2_0',range_x) traci.vehicle.setSpeedMode('veh_1',12) traci.vehicle.setSpeedMode('veh_0',12) for step in np.arange(50): traci.simulationStep() vehicle_ids = traci.vehicle.getIDList() if len(vehicle_ids) > 2: exit() if len(vehicle_ids) == 2: range_x = max(traci.vehicle.getLanePosition(vehicle_ids[0]) , traci.vehicle.getLanePosition(vehicle_ids[1])) \ - min(traci.vehicle.getLanePosition(vehicle_ids[0]) , traci.vehicle.getLanePosition(vehicle_ids[1])) if range_x <= 0.01: return True if no_crash_count > 0: str_line = str(idx)+',crash_prob:'+str(round(crash_count/no_crash_count,5)) + ',crash:' +str(crash_count)+','+str(no_crash_count)+ '\n' wfile.write(str_line) return False
def reset(self): traci.load([ "-c", config_path, "--collision.action", "remove", "--no-step-log", "--no-warnings", "--no-duration-log" ]) print("reseting") traci.simulationStep() self.VehicleIds = traci.vehicle.getIDList() for vehId in self.VehicleIds: traci.vehicle.subscribe(vehId, (tc.VAR_SPEED, tc.VAR_LANE_INDEX, tc.VAR_DISTANCE, tc.VAR_ANGLE))
def reset(self): traci.load(["-c", "huge_crossing/huge_crossing.sumocfg"]) terminate = False while terminate == False: for i in range(len(num_vec)): if num_vec[i] != 'ego': traci.vehicle.setLaneChangeMode(num_vec[i], 512) self.output = run.getTensor() if self.output is not None: terminate = True traci.simulationStep() return self.getObservation()
def run(): """execute the TraCI control loop""" step = 0 while traci.simulation.getMinExpectedNumber() > 0 and step < 20000: traci.simulationStep() if step % 500 == 0: print(get_lane_waiting_vehicle_count()) traci.load([ "-c", "PoundSign/PoundSign.sumocfg", "--tripinfo-output", "tripinfo.xml", "-t" ]) step += 1 traci.close() sys.stdout.flush()
def start_sumo(config_file, already_running): """ Starts or restarts sumo with the given configuration file :param config_file: sumo configuration file :param already_running: if set to true then the command simply reloads the given config file, otherwise sumo is started from scratch """ arguments = ["-c"] sumo_cmd = [sumolib.checkBinary('sumo-gui')] arguments.append(config_file) if already_running: traci.load(arguments) else: sumo_cmd.extend(arguments) traci.start(sumo_cmd)
def reset(self): del CARS[:] del collisions[:] del DRIVEN[:] del gotReward[:] del hasCrashed[:] if self.test and len(self.run) != 0: self.result.append(list(self.run)) self.run.clear() global counter with fileinput.FileInput("data/junction.sumocfg", inplace=True) as file: for line in file: print(line.replace("junction" + str(counter) + ".rou.xml", "junction" + str(counter + 1) + ".rou.xml"), end='') counter += 1 createRoute.generate_random_routefile(counter) traci.load(["-c", self.config_path]) # Go to the simulation step where our autonomous car get's in action for _ in range(DEPART_TIME * 10): traci.simulationStep() # Setup environment #print("CARS: ", CARS) for car in CARS: try: traci.vehicle.setSpeedMode(car, 0) traci.vehicle.setSpeed(car, rn.randint(1, 8)) traci.vehicle.subscribe( car, [VAR_SPEED, VAR_DISTANCE, VAR_POSITION, VAR_ANGLE]) except traci.exceptions.TraCIException as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] # print(car, exc_type, fname, exc_tb.tb_lineno) pass traci.simulationStep() self.set_state(VEH_ID) return self.state
def start_sumo(sumo_binary, config_file, already_running): """ Starts or restarts sumo with the given configuration file :param sumo_binary: SUMO binary to be called from command line (sumo or sumo-gui) :param config_file: sumo configuration file :param already_running: if set to true then the command simply reloads the given config file, otherwise sumo is started from scratch :type sumo_binary: str :type config_file: str :type already_running: bool """ arguments = ["-c"] sumo_cmd = [sumolib.checkBinary(sumo_binary)] arguments.append(config_file) if already_running: traci.load(arguments) else: sumo_cmd.extend(arguments) traci.start(sumo_cmd)
def _reset(self): if self.test and len(self.run) != 0: self.result.append(list(self.run)) self.run.clear() traci.load(["-c", self.config_path]) # Go to the simulation step where our autonomous car get's in action for _ in range(DEPART_TIME * 10): traci.simulationStep() # Setup environment traci.vehicle.setSpeedMode(VEH_ID, 0) traci.vehicle.setSpeed(VEH_ID, rn.randint(1, 8)) traci.simulationStep() traci.vehicle.subscribe( VEH_ID, [VAR_SPEED, VAR_DISTANCE, VAR_POSITION, VAR_ANGLE]) self.set_state() return self.state
def reset(self): self.end = 0 self.TotalReward = 0 self.oldDistance = 0 self.nowDistance = 0 self.lastTlsTd = "cross_6" self.lastdistance = 0.99 self.x_v = 0 self.y_v = 0 traci.load([ "-c", config_path, "--collision.action", "remove", "--no-step-log", "--no-warnings", "--no-duration-log" ]) print("Resetting...") #traci.vehicle.add("agent", "agent_route") traci.vehicle.setColor("agent", (255, 0, 0, 255)) traci.vehicle.setSpeed("agent", 10) #traci.gui.trackVehicle('View #0', "agent") traci.simulationStep() AgentAvailable = False while AgentAvailable == False: traci.simulationStep() self.VehicleIds = traci.vehicle.getIDList() if self.AgentId in self.VehicleIds: AgentAvailable = True self.StartTime = traci.simulation.getCurrentTime() for vehId in self.VehicleIds: traci.vehicle.subscribe(vehId, (tc.VAR_SPEED, tc.VAR_POSITION, tc.VAR_LANE_INDEX, tc.VAR_DISTANCE)) traci.vehicle.subscribeLeader(self.AgentId, 50) if vehId == self.AgentId: traci.vehicle.setSpeedMode(self.AgentId, 0) traci.vehicle.setLaneChangeMode(self.AgentId, 0) self.state, breaklight, breakstop, wronglane = self.perception() return self.state
def reset(self): self.generate_routefile() traci.load(["-c", "../mapfiles/one_lane/one_lane.sumocfg", "--collision.check-junctions", "1", '--no-step-log', 'true', '-W', 'true',"--start", '--time-to-teleport', '60']) run = Assembler(self.carID) terminate = False while terminate == False: num_vec = traci.vehicle.getIDList() for i in range(len(num_vec)): if num_vec[i] != 'ego': traci.vehicle.setLaneChangeMode(num_vec[i], 512) self.output = run.getTensor() if self.output is not None: terminate = True traci.simulationStep() return self.getObservation()
def reset(self): self.end = False self.TotalReward = 0 self.numberOfLaneChanges = 0 self.numberOfOvertakes = 0 self.currentTrackingVehId = 'None' if self.test and len(self.run) != 0: self.result.append(list(self.run)) self.run.clear() traci.load(["-c", config_path]) print('Resetting the layout') traci.simulationStep() AutoCarAvailable = False # lanes = traci.lane.getIDList() while AutoCarAvailable == False: traci.simulationStep() self.VehicleIds = traci.vehicle.getIDList() if self.AutoCarID in traci.vehicle.getIDList(): AutoCarAvailable = True self.StartTime = traci.simulation.getCurrentTime() self.VehicleIds = traci.vehicle.getIDList() # Just check if the auto car still exisits and that there has not been any collision for VehId in self.VehicleIds: traci.vehicle.subscribe(VehId, (tc.VAR_SPEED, tc.VAR_POSITION, tc.VAR_LANE_INDEX, tc.VAR_DISTANCE)) traci.vehicle.subscribeLeader(self.AutoCarID, 50) # Subscribe the vehicle information of the car in front of Auto # traci.vehicle.setSpeedMode(VehId,0) #Disable Car Following if VehId == self.AutoCarID: speedMode = traci.vehicle.getSpeedMode(VehId) speedMode = speedMode & int('11000', 2) # traci.vehicle.subscribeLeader(self.AutoCarID,50) # Subscribe the vehicle information of the car in front of Auto traci.vehicle.setLaneChangeMode(VehId, 0) # Disable automatic lane changing #Disable following SpeedLimit # traci.vehicle.setSpeedFactor(VehId,2) #self.AutocarSpeed = 15.00 #traci.vehicle.setSpeed(VehId, 0) self.state = self._findstate() # traci.simulationStep() return self.state
def reset(self, sumoBinary): ''' :function: Resets variables necessary to start next training episode, and reloads randomly initialized next episode XML files. :return: None, but resets environment :Notes: * Commented lines are tautologies (do not add new info), kept only for reference to what is inherited from last episode run and from initialization. :sources: https://www.eclipse.org/lists/sumo-user/msg03016.html (how to reset SUMO environent from code) ''' # ------------------------------------------------------------------- # # 1 : R E S E T O L D V A R I A B L E S # ------------------------------------------------------------------- # # self.name = self.name # self.amb_goal_dist = self.amb_goal_dist self.reward = 0.0 self.emer_start_lane = None # self.rel_amb_y_min = self.rel_amb_y_min # self.rel_amb_y_max = self.rel_amb_y_max # self.count_emergency_vehicles = self.count_emergency_vehicles # self.count_ego_vehicles = self.count_ego_vehicles # self.agents = self.agents # self.emer = self.emer self.hidden_state = None self.observed_state = None self.full_state = None # self.Actions = self.Actions # self.action_to_string_dict = self.action_to_string_dict # Calculation for optimal time is kept in case the track_len is changed between episodes self.optimal_time = int( np.round(track_len / self.emer.max_speed) ) # Optimal Number of time steps: number of time steps taken by ambulance at maximum speed self.max_steps = 20 * self.optimal_time # ---------------------------------------------------------------------------- # # 2 : R A N D O M L Y I N I T I A L I Z E X M L s # ---------------------------------------------------------------------------- # self.templates_reset() # Vehicles list gets reset here # ---------------------------------------------------------------------------- # # 3 : I N I T I A T E S U M O E N V I R O N M E N T # and vehicles list # ---------------------------------------------------------------------------- # traci.load([ "-c", Sumocfg_DIR, "--tripinfo-output", "tripinfo.xml", "--start", "--seed", str(Sumo_random_seed) ]) # Create the real global vehicles list (temporary/fake: initialized one in Config.py with ambulance only): vehicles_list = [ Vehicle("LH") ] # NOTE: No error will be produced if some cars are not in this list. # An error will be produced only when in a not-present ID is requested , Vehicle("RB0"), Vehicle("RB1"), Vehicle("RB2"), Vehicle("RB3") for lane, num_cars in vehicles_data.items(): for agent_index in range(num_cars): vehicles_list.append( Vehicle(env.create_vehicle_id(lane, agent_index))) for vehc in vehicles_list: # vehicles initialized vehc.initialize() self.list_of_vehicles = copy.copy( vehicles_list ) # Note: to copy the list, keeping reference to the original vehicles (as opposed to deepcopy, which would copy vehicles) self.recount_vehicles()
generate_routefile_random(episode_time, num_vehicles) traci.load(["--start", "-c", "data/cross.sumocfg", "--tripinfo-output", "tripinfo.xml"]) ''' state = getState(transition_time) action = np.random.choice(np.arange(nA)) new_state = makeMove(action,transition_time) reward = getReward(state,new_state) replay_memory.append([state,action,reward,new_state]) print(len(replay_memory)) total_t = 0 for episode in range(num_episode): num_vehicles += 1 generate_routefile() #generate_routefile_random(episode_time, num_vehicles) traci.load(["--start", "-c", "data/cross.sumocfg", "--tripinfo-output", "tripinfo.xml"]) traci.trafficlight.setPhase("0", 0) state = getState(transition_time) counter = 0 stride = 0 while traci.simulation.getMinExpectedNumber() > 0: print("Episode # ", episode) # print("Waiting time on lane 1i_0 = ",getWaitingTime("1i_0")) print("Inside episode counter", counter) counter += 1 total_t += 1 # batch_experience = experience[:batch_history]
def sumo_traci_runner( simulation_time, custom_traci_functions, num_iterations, scenario_dir, simulation_step_size=1, init_time_skip=50, seed=0, run_per_scenario=2, **kwargs, ): skip = int(1 / simulation_step_size) np.random.seed(seed) all_simulation_data = [] traci.init(8813) task = kwargs["task"] level = kwargs["level"] print(task, level) def dynamic_vehicle_gen_probability_func(pattern, current_step): if kwargs["vehicle_gen_prob"] > 0: current_step = current_step // run_per_scenario for edge_id in pattern["routes"]: if pattern["routes"][edge_id] is None: continue pattern["routes"][edge_id]["begin_time_init"]["params"][ "probability" ] = ( kwargs["vehicle_gen_prob"] + current_step * kwargs["dynamic_vehicle_gen_prob"] ) return pattern build_scenarios( task=f"task{task}", level_name=level, num_seeds=num_iterations * run_per_scenario, has_stopwatcher=False, dynamic_pattern_func=dynamic_vehicle_gen_probability_func, ) for j in range(num_iterations * run_per_scenario): current_time = time.time() current_scenario_dir = scenario_dir + "-flow-" + str(j) current_vehicle_gen_prob = ( kwargs["vehicle_gen_prob"] + j // run_per_scenario * kwargs["dynamic_vehicle_gen_prob"] ) # for k in range(run_per_scenario): # # Reset scenario traci.load( [ "-n", current_scenario_dir + "/map.net.xml", "-r", current_scenario_dir + "/traffic/all.rou.xml", "--seed", str(np.random.randint(0, run_per_scenario)), ] ) simulation_data = {} # Dictionary object to collect data ... accumulated_time = 0 for i in range(int((simulation_time + init_time_skip) / simulation_step_size)): accumulated_time += simulation_step_size traci.simulationStep(accumulated_time) # One step to initialize everything if i > int(init_time_skip / simulation_step_size) and i % skip == 0: # print("Sim:", j, "Sim Seed:", k,"Time Step:", traci.simulation.getTime(), "Num Vehicles:", len(traci.vehicle.getIDList())) for func in custom_traci_functions: func( traci, simulation_data, last_step=i - init_time_skip - 1, **kwargs, ) simulation_data["vehicle_gen_prob"] = {"All": current_vehicle_gen_prob} for route_id in simulation_data["route"]: simulation_data["vehicle_gen_prob"][route_id] = current_vehicle_gen_prob all_simulation_data.append(simulation_data) print( "Sim:", j, "with", run_per_scenario, "seeds, completed with", time.time() - current_time, "seconds", ) traci.close() # Reached End of simulation ... return all_simulation_data
def evaluate(agent, render=False): traci.start([ sumoBinary, "-c", "data/test.sumocfg", "--tripinfo-output", "tripinfo.xml" ]) traci.trafficlight.setPhase("0", 0) AVG_Q_len_perepisode = [] sum_q_lens = 0 length_data_avg = [] count_data = [] delay_data_avg = [] delay_data_min = [] delay_data_max = [] delay_data_time = [] current_left_time = 0 current_top_time = 0 current_bottom_time = 0 current_right_time = 0 overall_lane_qlength = [0, 0, 0, 0] num_cycles = 0 num_qlength_instances = 0 traci.load([ "--start", "-c", "data/test.sumocfg", "--tripinfo-output", "tripinfo.xml" ]) traci.trafficlight.setPhase("0", 0) obs = getState(agent.obs_dim) while traci.simulation.getMinExpectedNumber() > 0: prev_phase = traci.trafficlight.getPhase("0") #print(obs) action = agent.predict(obs[0].flatten()) # 预测动作,只选最优动作 next_obs, qlength, avg_lane_qlength = makeMove(action, transition_time) new_phase = traci.trafficlight.getPhase("0") #reward = getRewardAbsolute(obs, next_obs) #done = True #待定 print("Previous phase = ", prev_phase) print("New phase = ", new_phase) vehicleList = traci.vehicle.getIDList() num_vehicles = len(vehicleList) print("Number of cycles = ", num_cycles) if num_vehicles: avg = 0 max = 0 mini = 100 for id in vehicleList: time = traci.vehicle.getAccumulatedWaitingTime(id) if time > max: max = time if time < mini: mini = time avg += time avg /= num_vehicles delay_data_avg.append(avg) delay_data_max.append(max) delay_data_min.append(mini) length_data_avg.append(qlength) count_data.append(num_vehicles) delay_data_time.append(traci.simulation.getCurrentTime() / 1000) if traci.simulation.getCurrentTime() / 1000 < 2100: overall_lane_qlength = list( map(add, overall_lane_qlength, avg_lane_qlength)) num_qlength_instances += 1 if prev_phase == 3 and new_phase == 0: num_cycles += 1 if prev_phase == 0: current_bottom_time += transition_time if prev_phase == 1: current_right_time += transition_time if prev_phase == 2: current_top_time += transition_time if prev_phase == 3: current_left_time += transition_time obs = next_obs overall_lane_qlength[:] = [ x / num_qlength_instances for x in overall_lane_qlength ] current_right_time /= num_cycles current_top_time /= num_cycles current_left_time /= num_cycles current_bottom_time /= num_cycles avg_free_time = [ current_left_time, current_top_time, current_right_time, current_bottom_time ] plt.plot(delay_data_time, delay_data_avg, 'b-', label='avg') #plt.plot(delay_data_time, delay_data_min, 'g-', label='min') #plt.plot(delay_data_time, delay_data_max,'r-', label='max') plt.legend(loc='upper left') plt.ylabel('Waiting time per minute') plt.xlabel('Time in simulation (in s)') plt.figure() plt.plot(delay_data_time, length_data_avg, 'b-', label='avg') plt.legend(loc='upper left') plt.ylabel('Average Queue Length') plt.xlabel('Time in simulation (in s)') plt.figure() plt.plot(delay_data_time, count_data, 'b-', label='avg') plt.legend(loc='upper left') plt.ylabel('Average Number of Vehicles in Map') plt.xlabel('Time in simulation (in s)') plt.figure() label = [ 'Obstacle Lane reward', 'Top Lane w/ traffic', 'Right lane', 'Bottom lane' ] index = np.arange(len(label)) plt.bar(index, avg_free_time, color=['red', 'green', 'blue', 'blue']) plt.xlabel('Lane') plt.ylabel('Average Green Time per Cycle (in s)') plt.xticks(index, label) axes = plt.gca() axes.set_ylim([0, 60]) plt.figure() label = [ 'Obstacle Lane reward', 'Top Lane w/ traffic', 'Right lane', 'Bottom lane' ] index = np.arange(len(label)) plt.bar(index, overall_lane_qlength, color=['red', 'green', 'blue', 'blue']) plt.xlabel('Lane') plt.ylabel('Average Q-length every 8 seconds') plt.xticks(index, label) axes = plt.gca() axes.set_ylim([0, 20]) plt.show() AVG_Q_len_perepisode.append(sum_q_lens / 702) sum_q_lens = 0
def cmc_run(max_iters=100, N_per_iter=1000): vel_bimodal_params = (16.47087736, 7.401838, -18.54877962, 16.4562847, -7.41718461, 18.56954167) def _gauss(x, mu, sigma, A): return A * math.exp(-(x - mu)**2 / 2 / sigma**2) def _bimodal(X, mu1, sigma1, A1, mu2, sigma2, A2): return [ _gauss(x, mu1, sigma1, A1) + _gauss(x, mu2, sigma2, A2) for x in X ] range_inv_pareto_params = 2.25843986 def _pareto(X, param): return [(param * math.pow(.1, param)) / (math.pow(x, param + 1)) if x >= .1 else 0 for x in X] ttc_inv_5_15_pareto_params = 0.00586138 ttc_inv_15_25_pareto_params = 0.00622818 ttc_inv_25_35_pareto_params = 0.00707637 def _exp(X, lambda_param, a): return [ a * (1 / lambda_param) * np.exp(-1 * x / lambda_param) for x in X ] wfile = open(root_path + 'sumo_cmc_log.out', 'w') crash_count = 0 no_crash_count = 0 results = [] min_range = np.inf traci.start(sumoCmd) X, Y = [], [] for batch_id in np.arange(max_iters): vel_choice_list = np.arange(0, 60, 0.1) vel_choice_weights = _bimodal(vel_choice_list, *vel_bimodal_params) sum_weights = sum(vel_choice_weights) vel_choice_weights = [x / sum_weights for x in vel_choice_weights] sample_vel_lc_mps = np.random.choice(vel_choice_list, p=vel_choice_weights, size=1000) sample_range_inverse = np.random.pareto(range_inv_pareto_params, size=1000) sample_ttc_inv = [] for vel in sample_vel_lc_mps: if 0 <= vel < 15: ttc_inv_pareto_params = ttc_inv_5_15_pareto_params elif 15 <= vel < 25: ttc_inv_pareto_params = ttc_inv_15_25_pareto_params else: ttc_inv_pareto_params = ttc_inv_25_35_pareto_params ttc_inv_choice_list = np.arange(0, 4, 0.001) ttc_inv_choice_weights = _pareto(ttc_inv_choice_list, ttc_inv_pareto_params) sum_weights = sum(ttc_inv_choice_weights) ttc_inv_choice_weights = [ x / sum_weights for x in ttc_inv_choice_weights ] sample_ttc_inv.append( np.random.choice(ttc_inv_choice_list, p=ttc_inv_choice_weights)) range_rate = [ -x[0] / x[1] for x in zip(sample_ttc_inv, sample_range_inverse) ] sample_vel_s_mps = [ x[0] - x[1] for x in zip(sample_vel_lc_mps, range_rate) ] count_crash_iter = 0 for run_id in np.arange(N_per_iter): traci.load(load_params) range_x = 1 / sample_range_inverse[run_id] speed_lc = min(55, sample_vel_s_mps[run_id]) speed_s = min(55, sample_vel_lc_mps[run_id]) traci.vehicle.add(vehID='veh_0', routeID='route0', depart=0, pos=0, speed=speed_s) traci.vehicle.add(vehID='veh_1', routeID='route0', depart=0, pos=range_x, speed=speed_lc) traci.vehicle.moveTo('veh_0', '1to2_0', 0) traci.vehicle.moveTo('veh_1', '1to2_0', range_x) traci.vehicle.setSpeedMode('veh_1', 12) traci.vehicle.setSpeedMode('veh_0', 12) for step in np.arange(50): traci.simulationStep() vehicle_ids = traci.vehicle.getIDList() if len(vehicle_ids) > 2: exit() if len(vehicle_ids) == 2: range_x = max(traci.vehicle.getLanePosition(vehicle_ids[0]) , traci.vehicle.getLanePosition(vehicle_ids[1])) \ - min(traci.vehicle.getLanePosition(vehicle_ids[0]) , traci.vehicle.getLanePosition(vehicle_ids[1])) if range_x < min_range: min_range = range_x if range_x <= 0.1: count_crash_iter = count_crash_iter + 1 crash_count = crash_count + 1 break no_crash_count = no_crash_count + 1 results.append(crash_count / no_crash_count) str_line = 'batch:' + str(batch_id) + ',run:' + str( run_id) + ',no-crash:' + str(no_crash_count) + ',crash:' + str( crash_count) + ',min range_x:' + str(min_range) + '\n' wfile.write(str_line) p_crash_iter = count_crash_iter / 1000 X.append(batch_id) Y.append(p_crash_iter) traci.close() plt.plot(X, Y) plt.show() print(list(zip(X, Y))) res_file_name = root_path + 'cmc_res_final.list' flush_list(res_file_name, list(zip(X, Y)))
def simulate_run(l,*args): choice_list = [] weight_list = [] pu_list = [] w_ttc_inv = [[0.1238042, 0.23736926],[0.09011138, 0.31582509],[0.06177499, 0.54435407]] w_range_inv = [0.04528787, 0.00955973] distracted_policy = False U_prime,global_crash_prob_dict,wfile,opt_lambda,out_iter_num = args[0],args[1],args[2],args[3],args[4] out_iter_num = 'NA' if out_iter_num is None else out_iter_num crash_count = 0 no_crash_count = 0 count = 0 for k,v in U_prime.items(): _p_0 = (1/3) * (l[0]*np.exp(l[0]*v[0])) / (np.exp(2*l[0]) -1) if l[0]!=0 else 1/2 _p_1 = (1/3) * (l[1]*np.exp(l[1]*v[1])) / (np.exp(2*l[1]) -1) if l[1]!=0 else 1/2 _p_2 = (1/3) * (l[2]*np.exp(l[2]*v[2])) / (np.exp(2*l[2]) -1) if l[2]!=0 else 1/2 p_a = _p_0 + _p_1 + _p_2 choice_list.append(k) weight_list.append(p_a) speed_s = k[0] range_x = k[2] vel_lc = k[1] ttc_inv = (speed_s - vel_lc ) / k[2] idx = None if 0 <= speed_s <15: idx = 0 elif 15 <= speed_s < 25: idx = 1 else: idx = 2 p_u = eval_vel_s(speed_s) * eval_exp(1/range_x,w_range_inv[0],w_range_inv[1]) * eval_exp(ttc_inv,w_ttc_inv[idx][0],w_ttc_inv[idx][1]) pu_list.append(p_u) print(min(pu_list),max(pu_list),sum(pu_list)) print(min(weight_list),max(weight_list),sum(weight_list)) orig_prob_list = list(weight_list) sum_weights = sum(weight_list) sum_pu_weights = sum(pu_list) weight_list=[x/sum_weights for x in weight_list] pu_list=[x/sum_pu_weights for x in pu_list] import random p_u_dict,q_u_dict,vel_lc_range_samples = dict(),dict(),[] vel_lc_range_sample_index = random.choices(population=np.arange(len(choice_list)).tolist(),weights=weight_list,k=1000) for i in vel_lc_range_sample_index: vel_lc_range_samples.append(choice_list[i]) q_u_dict[choice_list[i]] = weight_list[i] p_u_dict[choice_list[i]] = pu_list[i] x,y = [],[] x = [choice_list[i][2] for i in vel_lc_range_sample_index] y = [weight_list[i] for i in vel_lc_range_sample_index] print(min(y),max(y),sum(y)) x_y = list(zip(x,y)) x_y_sorted = sorted(x_y, key=lambda tup: tup[0]) plt.plot([x[0] for x in x_y_sorted],[x[1] for x in x_y_sorted]) y = [pu_list[i] for i in vel_lc_range_sample_index] print(min(y),max(y),sum(y)) x_y = list(zip(x,y)) x_y_sorted = sorted(x_y, key=lambda tup: tup[0]) plt.plot([x[0] for x in x_y_sorted],[x[1] for x in x_y_sorted]) plt.show() q_u_list = [] for idx,val in enumerate(vel_lc_range_samples): traci.load(load_params) speed_lc = min(55,val[1]) vel_s = min(55,val[0]) range_x = val[2] traci.vehicle.add(vehID='veh_0', routeID='route0', depart=0,pos=0, speed=vel_s) traci.vehicle.moveTo('veh_0','1to2_0',0) traci.vehicle.setSpeedMode('veh_0',12) key_l = list(l) if not distracted_policy: traci.vehicle.add(vehID='veh_1', routeID='route0', depart=0,pos=range_x, speed=speed_lc) traci.vehicle.moveTo('veh_1','1to2_0',range_x) traci.vehicle.setSpeedMode('veh_1',12) else: u_prime_reaction = [1-util_reaction(x) for x in np.arange(0,3,.1)] sample_reaction_l = l[3] weights_reaction = [(sample_reaction_l*np.exp(sample_reaction_l*u_prime)) / (np.exp(2*sample_reaction_l) -1) if sample_reaction_l!=0 else 1/2 for u_prime in u_prime_reaction] _sum_w_reaction = sum(weights_reaction) weights_reaction = [x/_sum_w_reaction for x in weights_reaction] sample_reaction_time = int(round(np.random.choice(np.arange(0,3,.1),p=weights_reaction))) ''' put vehicle 2 in a position that it would have been in sample_reaction_time''' new_range = range_x - ((vel_s-speed_lc)*sample_reaction_time) if new_range > 0: range_x = new_range traci.vehicle.add(vehID='veh_1', routeID='route0', depart=0,pos=range_x, speed=speed_lc) traci.vehicle.moveTo('veh_1','1to2_0',range_x) traci.vehicle.setSpeedMode('veh_1',12) for step in np.arange(50): traci.simulationStep() vehicle_ids = traci.vehicle.getIDList() if len(vehicle_ids) > 2: exit() if len(vehicle_ids) == 2: range_x = max(traci.vehicle.getLanePosition(vehicle_ids[0]) , traci.vehicle.getLanePosition(vehicle_ids[1])) \ - min(traci.vehicle.getLanePosition(vehicle_ids[0]) , traci.vehicle.getLanePosition(vehicle_ids[1])) if range_x <= 0.01: crash_count = crash_count + 1 q_u = q_u_dict[val] p_u = p_u_dict[val] q_u_list.append((q_u,p_u)) if str(key_l) in global_crash_prob_dict: if distracted_policy: global_crash_prob_dict[str(key_l)].append((vel_s,speed_lc,range_x,sample_reaction_time)) else: global_crash_prob_dict[str(key_l)].append((vel_s,speed_lc,range_x)) else: if distracted_policy: global_crash_prob_dict[str(key_l)] = [(vel_s,speed_lc,range_x,sample_reaction_time)] else: global_crash_prob_dict[str(key_l)] = [(vel_s,speed_lc,range_x)] break no_crash_count = no_crash_count + 1 if distracted_policy: str_line = 'lambda:'+str(key_l)+','+str(out_iter_num)+'-'+str(idx)+',crash_prob:'+str(round(crash_count/no_crash_count,5)) + ',crash:' +str(crash_count)+','+str(no_crash_count)+',reaction_time:'+str(sample_reaction_time)+','+str(sample_reaction_l) + '\n' else: str_line = 'lambda:'+str(key_l)+','+str(out_iter_num)+'-'+str(idx)+',crash_prob:'+str(round(crash_count/no_crash_count,5)) + ',crash:' +str(crash_count)+','+str(no_crash_count)+ '\n' wfile.write(str_line) p_crash_iter = crash_count / no_crash_count if out_iter_num is 'NA': return p_crash_iter else: return p_crash_iter,q_u_list
def episode(RB_RLAlgorithm=None, Proudhon=None, episode_num=0): ######################## # 1 inits done = False # are we done with the episode or not step = 0 # step number errors_arrays = np.zeros(7) # for error checking capability if (Proudhon is None): Proudhon = env(vehicles_list) # vehicles_list = [LH, RB] ## -- ## Proudhon.reset() traci.load(["-c", Sumocfg_DIR, "--tripinfo-output", "tripinfo.xml"]) for vehc in vehicles_list: vehc.initialize() ## -- ## if (RB_RLAlgorithm is None): algo_params = q_learning_params # from Config.py RB_RLAlgorithm = RLAlgorithm(Proudhon, algo_params=algo_params, load_q_table=load_q_table, test_mode_on=vis_update_params['test_mode_on']) # Algorithm for RB Agent ## ###################### ######################## # 2 init measurements traci.simulationStep() # After stepping Proudhon.get_emer_start_lane() # (communication from vehicle): getters(vehicles_list) # measure all values from environment # (communication to environment): Proudhon.measure_full_state() # measure all values into our agents # (communication to algorithm/agent): new_observed_state_for_this_agent = Proudhon.observed_state[0] # rl_disengagement # Chose Action from Feasible Actions: # chosen action must be none , feasible actions [] feasible_actions_for_current_state = Proudhon.get_feasible_actions(vehicles_list[1], new_observed_state_for_this_agent) chosen_action = RB_RLAlgorithm.pickAction(feasible_actions_for_current_state, new_observed_state_for_this_agent) RB_RLAlgorithm.applyAction(chosen_action, vehicles_list[1],new_observed_state_for_this_agent) # Request Action on Agent episode_reward = 0 episode_reward_list = [] ######################## # rl_disengagement_end # 3: MAIN LOOP if (episode_num % vis_update_params['every_n_episodes'] == 0): print(f'E:{episode_num: <{6}}|S:{0: <{4}} | ' f'epsilon: {RB_RLAlgorithm.epsilon: <{31}} | ' f'state: {[str(x)[:5] + " " * max(0, 5 - len(str(x))) for x in Proudhon.observed_state[0]]} |') while traci.simulation.getMinExpectedNumber() > 0: # 3.1: Store last states amb_last_velocity = Proudhon.emer.spd last_observed_state = Proudhon.observed_state last_observed_state_for_this_agent = last_observed_state[0] # ----------------------------------------------------------------# # 3.1.1: Store last picked action # rl_disengagement last_picked_action = chosen_action # check_related_fcn # # # ----------------------------------------------------------------- # # 3.2: M O V E O N E S I M U L A T I O N S T E P # ----------------------------------------------------------------- # traci.simulationStep() # actual action applying step += 1 # TODO: Turn this into are_we_ok function if (vehicles_list[0].getL() != Proudhon.emer_start_lane): raise ValueError( f"Ambulance Changed lane from {Proudhon.emer_start_lane} to {vehicles_list[0].getL()} on step {step}. " f"\nAmbulance Should not change lane. Quitting.") # ----------------------------------------------------------------- # # 3.3: measurements and if we are done check getters(vehicles_list) Proudhon.measure_full_state() new_observed_state_for_this_agent = Proudhon.observed_state[0] done = Proudhon.are_we_done(full_state=Proudhon.full_state, step_number=step) if (step % vis_update_params['every_n_iters'] == 0 and episode_num % vis_update_params[ 'every_n_episodes'] == 0): # print step info print(f'E:{episode_num: <{6}}|S:{step: <{4}} | ' f'reward : {Proudhon.reward},' f'lastAction: {chosen_action : <{12}} | ' #f'cumReward: ' + str(episode_reward)[:6] + ' ' * max(0, 6 - len(str(episode_reward))) + f' | state: {[str(x)[:5] + " " * max(0, 5 - len(str(x))) for x in Proudhon.observed_state[0]]}, ' f'actionMethod: {RB_RLAlgorithm.action_chosing_method : <{14}}') #rl_disengagement modify RB_RLAlgorithm.action_chosing_method to be null , Proudhon.reward should equal zero # 3.3.1: checking if we are ok # rl_disengagement two conditions rl_enabled or not # if (enable_checks): errors_arrays += are_we_ok(1, True, Proudhon, step, last_picked_action, last_observed_state_for_this_agent, new_observed_state_for_this_agent , Proudhon) # 3.4: reward last step's chosen action # rl_disengagement reward should be null , reward_list no append reward = Proudhon.calc_reward(amb_last_velocity, done, step,last_observed_state_for_this_agent,new_observed_state_for_this_agent) rel_distance = new_observed_state_for_this_agent[4] if (enable_rl_dis_engagement & (~ ((rel_distance < Proudhon.rel_amb_y_max) & (rel_distance > Proudhon.rel_amb_y_min)))): # if the agent is outside ambulance window and rl _disenagement is enabled , do not add this step reward(equals None) to episode reward pass else: episode_reward += reward # for history episode_reward_list.append(reward) # for history # 3.6: Feasibility check for current_state (for next step) # rl_disengagement empty list [] feasible_actions_for_current_state = Proudhon.get_feasible_actions(vehicles_list[1],new_observed_state_for_this_agent) # 3.5: update q table using backward reward logic # rl_disengagement should not be updated anyway RB_RLAlgorithm.update_q_table(chosen_action, reward, new_observed_state_for_this_agent, last_observed_state_for_this_agent, feasible_actions_for_current_state) if (done): # DO NOT REMOVE THIS (IT BREAKS IF WE ARE DONE) if (episode_num % vis_update_params['every_n_episodes'] == 0): if ( done == 1): # TODO: Remove episode_end_reason outsisde the print check -- we might need it elsewehere episode_end_reason = "max steps" elif (done == 2): episode_end_reason = "ambulance goal" elif (done == 3): # TODO: #TOFIX: What should be the state here? episode_end_reason = "agent goal" else: raise ValueError(f"Episode: {episode_num} done is True ={done} but reason not known !") print(f'E:{episode_num: <{6}}|S:{step: <{4}} | ' f'reward : {Proudhon.reward},' f'lastAction: {chosen_action : <{12}} | ' #f'cumReward: ' + str(episode_reward)[:6] + ' ' * max(0, 6 - len(str(episode_reward))) + f' | state: {[str(x)[:5] + " " * max(0, 5 - len(str(x))) for x in Proudhon.observed_state[0]]}, ' f'actionMethod: {RB_RLAlgorithm.action_chosing_method : <{14}}') ## rl_disengagement # rl_disengagement _modified if(enable_checks): print( "non_requested_lane_change , acc_errors , dec_errors , _no_acc_errors , non_requested_speed_change ,change_left_errors , change_right_errors") print(f"Error_happened_during_simulation {errors_arrays}") break # 3.7: Actually Choose Action from feasible ones (for next step) ## rl_disengagement _modified chosen_action = RB_RLAlgorithm.pickAction(feasible_actions_for_current_state, new_observed_state_for_this_agent) # 3.8: Request environment to apply new action (Request action on Agent for next step) # Action is still not applied here, but on #3.2 ## rl_disengagement _modified RB_RLAlgorithm.applyAction(chosen_action, vehicles_list[1],new_observed_state_for_this_agent) # Episode end sys.stdout.flush() # 4: Update Epsilon after episode is done old_epsilon = RB_RLAlgorithm.epsilon RB_RLAlgorithm.epsilon = RB_RLAlgorithm.min_epsilon + (RB_RLAlgorithm.max_epsilon - RB_RLAlgorithm.min_epsilon) * \ np.exp( -RB_RLAlgorithm.decay_rate * episode_num) # DONE: Change epsilon to update every episode not every iteration if (episode_num % vis_update_params['every_n_episodes'] == 0): print(f'\n\nE:{episode_num: <{6}}| END:{step: <{4}} | ' f'finalCumReward: ' + str(episode_reward)[:6] + ' ' * max(0, 6 - len(str(episode_reward))) + " | " f'reason: {episode_end_reason: <{15}} | ' f'old_eps: {old_epsilon: <{10}}, ' f'new_eps: {RB_RLAlgorithm.epsilon: <{10}}') print('-' * 157) print('=' * 157) print('\n') if (vis_update_params['print_reward_every_episode'] and episode_num % vis_update_params['every_n_episodes'] != 0): print(f'E:{episode_num: <{6}}| END:{step: <{4}} | ' f'finalCumReward: ' + str(episode_reward)[:6] + ' ' * max(0, 6 - len(str(episode_reward))) + " | ") return RB_RLAlgorithm, Proudhon, episode_reward, episode_reward_list, errors_arrays
import os, sys if 'SUMO_HOME' in os.environ: tools = os.path.join(os.environ['SUMO_HOME'], 'tools') sys.path.append(tools) print('success') else: sys.exit("please declare environment variable 'SUMO_HOME'") import traci sumo_cmd = [ os.environ["SUMO_HOME"] + '/bin/sumo-gui', '-c', '../map/ramp3/mapDenseSlow.sumo.cfg', '--start', str(True) ] sumo_cmd_reload = [ '-c', '../map/ramp3/mapDenseSlow.sumo.cfg', '--start', str(True) ] traci.start(sumo_cmd) for j in range(3): for i in range(5): traci.simulationStep() traci.load(sumo_cmd_reload)
SUMO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. """ from __future__ import print_function from __future__ import absolute_import import os import subprocess import sys sys.path.append( os.path.join(os.path.dirname(sys.argv[0]), "..", "..", "..", "..", "..", "tools")) import traci import sumolib sumoBinary = sumolib.checkBinary('sumo') PORT = sumolib.miscutils.getFreeSocketPort() sumoProcess = subprocess.Popen("%s -c sumo.sumocfg --remote-port %s" % (sumoBinary, PORT), shell=True, stdout=sys.stdout) traci.init(PORT) traci.simulationStep() traci.load(["sumo.sumocfg"]) traci.simulationStep() traci.close() sumoProcess.wait()
def load_sumo(self): traci.load(self.sumo_cmd[1:])
PORT = sumolib.miscutils.getFreeSocketPort() sumoProcess = subprocess.Popen("%s -c sumo.sumocfg --remote-port %s" % (sumoBinary, PORT), shell=True, stdout=sys.stdout) traci.init(PORT) traci.simulationStep() # cause an error and catch it try: traci.vehicle.setRoute("horiz", "dummy") except traci.TraCIException: pass traci.load(["sumo.sumocfg"]) traci.simulationStep() traci.close() sumoProcess.wait() sumoBinary = sumolib.checkBinary('sumo-gui') PORT = sumolib.miscutils.getFreeSocketPort() sumoProcess = subprocess.Popen( "%s -S -Q -c sumo.sumocfg -l log.txt --remote-port %s" % (sumoBinary, PORT), shell=True, stdout=sys.stdout) traci.init(PORT) for i in range(3): traci.simulationStep()
def test_mc_run(): traci.start(sumoCmd) count = 0 line_num = 0 data_file = [] no_crash_count = 0 crash_count = 0 results = [] out_f = open(root_path + 'temp_out', 'w') with open(root_path + 'interac_seq_data_smooth.csv', 'r') as csvfile: csv_reader = csv.reader(csvfile, delimiter=',') for row in csv_reader: if int(row[0]) == count: data_file.append(row) elif int(row[0]) > count: count = count + 1 print('processing: ' + str(line_num / 1755166 * 100) + '%', 'crash:' + str(crash_count) + ',no crash:' + str(no_crash_count), file=out_f) if len(data_file) >= 10: print('starting run') traci.load(load_params) ''' print('set speed for 0 at',max(0,float(data_file[0][2]))) print('set speed for 1 at',max(0,float(data_file[0][3]))) print('allowed is',traci.lane.getMaxSpeed('1to2_0')) ''' traci.vehicle.add(vehID='veh_0', routeID='route0', depart=0, pos=0, speed=max(0, float(data_file[0][2]))) traci.vehicle.add(vehID='veh_1', routeID='route0', depart=0, pos=float(data_file[0][4]), speed=max(0, float(data_file[0][3]))) traci.vehicle.moveTo('veh_1', '1to2_0', float(data_file[0][4])) traci.vehicle.setSpeedMode('veh_1', 12) traci.vehicle.setSpeedMode('veh_0', 12) for step in np.arange(len(data_file)): vehicle_ids = traci.vehicle.getIDList() traci.simulationStep() traci.vehicle.setSpeed('veh_1', speed=max( 0, float(data_file[step][3]))) print( len(vehicle_ids), step + 1, traci.vehicle.getSpeed('veh_0'), traci.vehicle.getSpeed('veh_1'), float(traci.vehicle.getLanePosition('veh_1')) - float(traci.vehicle.getLanePosition('veh_0'))) range_x = float( traci.vehicle.getLanePosition('veh_1')) - float( traci.vehicle.getLanePosition('veh_0')) if range_x <= 0.001: crash_count = crash_count + 1 results.append((crash_count, no_crash_count)) break no_crash_count = no_crash_count + 1 data_file = [] data_file.append(row) line_num = line_num + 1 traci.close() print(no_crash_count) print(crash_count) print(results) with open(root_path + 'sumo_mc_results.out', 'a') as wfile: str_line = str(no_crash_count) + ',' + str(crash_count) + '\n' wfile.write(str_line) for r in results: wfile.write(r)
if len(left_action_memory) == action_memory_size: left_action_memory.pop(0) left_action_memory.append(leftAction) if len(right_action_memory) == action_memory_size: right_action_memory.pop(0) right_action_memory.append(rightAction) leftState = newLeftState rightState = newRightState traci.load([ "--start", "-c", "data/cross_2intersections_nosublane.sumocfg", "--tripinfo-output", "tripinfo.xml" ]) traci.trafficlight.setPhase("0", 0) traci.trafficlight.setPhase("10", 0) AVG_Q_len_perepisode.append(sum_q_lens / 702) sum_q_lens = 0 print(AVG_Q_len_perepisode) # import matplotlib.pyplot as plt # # plt.plot([x for x in range(num_episode)],[AVG_Q_len_perepisode], 'ro') # plt.axis([0, num_episode, 0, 10]) # plt.show()
## ----- ## traci.start([sumoBinary, "-c", Sumocfg_DIR, "--tripinfo-output", "tripinfo.xml"]) for vehc in vehicles_list: vehc.initialize() ## ----- ## episode_num = 0 Algorithm_for_RL, environment_for_next_episode, episode_reward, episode_reward_list, errors_arrays = episode() total_reward_per_episode.append(episode_reward) ### rl_disengagement should not be a concern empty list reward_history_per_episode.append(episode_reward_list) ## --- ## environment_for_next_episode.reset() traci.load(["-c", Sumocfg_DIR, "--tripinfo-output", "tripinfo.xml", "--start"]) # , "--start" for vehc in vehicles_list: vehc.initialize() # Placed here to set lane change mode ! Important ! ## --- ## while (episode_num < max_num_episodes): episode_num += 1 Algorithm_for_RL, environment_for_next_episode, episode_reward, episode_reward_list, errors_arrays = episode(Algorithm_for_RL, environment_for_next_episode, episode_num) total_reward_per_episode.append(episode_reward) reward_history_per_episode.append(episode_reward_list) cumulative_errors_arrays.append(errors_arrays) ## rl_disengagement _modified