def test_load(self): dirname = os.path.dirname(__file__) filename = os.path.join(dirname, "test.tll.xml") res = TrafficLightPhases(filename) self.assertEqual(['0', '1'], res.getIntersectionIds()) self.assertEqual(1, res.getNrPhases('1')) # one has yellow self.assertEqual(4, res.getNrPhases('0')) # 4 of 8 have yellow self.assertEqual("GGggrrrrGGggrrrr", res.getPhase("0", 0))
class SumoGymAdapter(Env): """ An adapter that makes Sumo behave as a proper Gym environment. At top level, the actionspace and percepts are in a Dict with the trafficPHASES as keys. @param maxConnectRetries the max number of retries to connect. A retry is needed if the randomly chosen port to connect to SUMO is already in use. """ _DEFAULT_PARAMETERS = { 'gui': True, # gui or not 'scene': 'four_grid', # subdirectory in the aienvs/scenarios/Sumo directory where 'tlphasesfile': 'cross.net.xml', # file 'box_bottom_corner': (0, 0), # bottom left corner of the observable frame 'box_top_corner': (10, 10), # top right corner of the observable frame 'resolutionInPixelsPerMeterX': 1, # for the observable frame 'resolutionInPixelsPerMeterY': 1, # for the observable frame 'y_t': 6, # yellow time 'car_pr': 0.5, # for automatic route/config generation probability that a car appears 'car_tm': 2, # for automatic route/config generation when the first car appears? 'route_starts': [], # for automatic route/config generation, ask Rolf 'route_min_segments': 0, # for automatic route/config generation, ask Rolf 'route_max_segments': 0, # for automatic route/config generation, ask Rolf 'route_ends': [], # for automatic route/config generation, ask Rolf 'generate_conf': True, # for automatic route/config generation 'libsumo': False, # whether libsumo is used instead of traci 'waiting_penalty': 1, # penalty for waiting 'new_reward': False, # some other type of reward ask Miguel 'lightPositions': {}, # specify traffic light positions 'scaling_factor': 1.0, # for rescaling the reward? ask Miguel 'maxConnectRetries': 50, # maximum reattempts to connect by Traci } def __init__(self, parameters: dict = {}): """ @param path where results go, like "Experiment ID" @param parameters the configuration parameters. gui: whether we show a GUI. scenario: the path to the scenario to use """ logging.debug(parameters) self._parameters = copy.deepcopy(self._DEFAULT_PARAMETERS) self._parameters.update(parameters) dirname = os.path.dirname(__file__) tlPhasesFile = os.path.join(dirname, "../../scenarios/Sumo/", self._parameters['scene'], self._parameters['tlphasesfile']) self._tlphases = TrafficLightPhases(tlPhasesFile) self.ldm = ldm(using_libsumo=self._parameters['libsumo']) self._takenActions = {} self._yellowTimer = {} self._chosen_action = None self.seed(42) # in case no seed is given self._action_space = self._getActionSpace() self.stats_control = Control(self._parameters['tripinfofolder']) self.factor_graph = self._parameters['factored_agents'] self.n_factors = len(list(self.factor_graph.keys())) self.factored_coords = self._parameters['factored_coords'] #self.pixelsPerMeter = self._parameters['pixelsPerMeter'] self.testseed = list(self._parameters['test_seed']) self.seed_cntr = 0 def step(self, actions: dict): self._set_lights(actions) self.ldm.step() obs = self._observe() done = self.ldm.isSimulationFinished() global_reward = self._computeGlobalReward() '''self.action_switches(actions) actual_reward = self.actual_global_reward(global_reward)''' # as in openai gym, last one is the info list return obs, global_reward, done, [] def reset_test_cntr(self): self.seed_cntr = 0 '''def actual_global_reward(self, global_reward): global_reward['result'] += -0.1*self._action_switches['0'] return global_reward def action_switches(self, actions:spaces.Dict): self._action_switches = {} for intersectionId in actions.keys(): if len(self._takenActions[intersectionId])==1: self._action_switches[intersectionId] = 0 else: prev_action = self._takenActions[intersectionId][-1] if prev_action != self._intToPhaseString(intersectionId, actions.get(intersectionId)): self._action_switches[intersectionId] = 1 else: self._action_switches[intersectionId] = 0 return self._action_switches''' def reset(self, episode=None): try: logging.debug("LDM closed by resetting") self.ldm.close() except: logging.debug( "No LDM to close. Perhaps it's the first instance of training") if episode != None: average_travel_times, average_travel_time = self.stats_control.log( ) logging.info("Starting SUMO environment...") self._startSUMO() # TODO: Wouter: make state configurable ("state factory") self._state = FactoredLDMMatrixState( self.ldm, [ self._parameters['box_bottom_corner'], self._parameters['box_top_corner'] ], factored_agents=self.factor_graph, factored_coords=self.factored_coords) return self._observe(), average_travel_times, average_travel_time else: logging.info("Starting SUMO environment...") self._startSUMO() # TODO: Wouter: make state configurable ("state factory") self._state = FactoredLDMMatrixState( self.ldm, [ self._parameters['box_bottom_corner'], self._parameters['box_top_corner'] ], factored_agents=self.factor_graph, factored_coords=self.factored_coords) return self._observe() # TODO: change the defaults to something sensible def render(self, delay=0.0): import colorama colorama.init() def move_cursor(x, y): print("\x1b[{};{}H".format(y + 1, x + 1)) def clear(): print("\x1b[2J") clear() move_cursor(100, 100) import numpy as np np.set_printoptions(linewidth=100) print(self._observe()) time.sleep(delay) def seed(self, seed): random.seed(seed) self._seed = int(time.time()) def close(self): self.__del__() # TODO: Wouter: this needs to return a space and be somehow unified with gym.spaces @property def observation_space(self): return self._state.update_state() @property def action_space(self): return self._action_space ########## Private functions ########################## def __del__(self): logging.debug("LDM closed by destructor") if 'ldm' in locals(): self.ldm.close() def _startSUMO(self): """ Start the connection with SUMO as a subprocess and initialize the traci port, generate route file. """ val = 'sumo-gui' if self._parameters['gui'] else 'sumo' maxRetries = self._parameters['maxConnectRetries'] sumo_binary = checkBinary(val) self.dirname = os.path.dirname(__file__) outfile = self._parameters['tripinfofolder'] self.out = os.path.join( *[self.dirname, "../../test/Stats", outfile, "tripinfo.xml"]) # Try repeatedly to connect while True: try: # this cannot be seeded self._port = random.SystemRandom().choice( list(range(10000, 20000))) if self._parameters['test'] == False: self._seed = self._seed + random.randint(0, 276574) else: try: self._seed = self.testseed[self.seed_cntr] except: self._seed = self._seed + random.randint(0, 276574) self.seed_cntr += 1 self._sumo_helper = SumoHelper(self._parameters, self._port, int(self._seed)) conf_file = self._sumo_helper.sumocfg_file logging.info("Configuration: " + str(conf_file)) sumoCmd = [ sumo_binary, "-c", conf_file, "--tripinfo-output", self.out, "--seed", str(self._seed) ] self.ldm.start(sumoCmd, self._port) except Exception as e: if str(e) == "connection closed by SUMO" and maxRetries > 0: maxRetries = maxRetries - 1 continue else: raise else: break self.ldm.init( waitingPenalty=self._parameters['waiting_penalty'], new_reward=self._parameters['new_reward']) # ignore reward for now # used to set boundaries to compute the network space and it computes the states, i can use this to compute states based on the fatored graphs. self.ldm.setResolutionInPixelsPerMeter( self._parameters['resolutionInPixelsPerMeterX'], self._parameters['resolutionInPixelsPerMeterY']) self.ldm.setPositionOfTrafficLights(self._parameters['lightPositions']) if list(self.ldm.getTrafficLights() ) != self._tlphases.getIntersectionIds(): raise Exception( "environment traffic lights do not match those in the tlphasesfile " + self._parameters['tlphasesfile'] + str(self.ldm.getTrafficLights()) + str(self._tlphases.getIntersectionIds())) def _intToPhaseString(self, intersectionId: str, lightPhaseId: int): """ @param intersectionid the intersection(light) id @param lightvalue the PHASES value @return the intersection PHASES string eg 'rrGr' or 'GGrG' """ logging.debug("lightPhaseId" + str(lightPhaseId)) return self._tlphases.getPhase(intersectionId, lightPhaseId) def _observe(self): """ Fetches the Sumo state and converts in a proper gym observation. The keys of the dict are the intersection IDs (roughly, the trafficLights) The values are the state of the TLs """ return self._state.update_state() def _computeGlobalReward(self): """ Computes the global reward """ return self._state.update_reward() def _getActionSpace(self): """ @returns the actionspace: a dict containing <id,phases> where id is the intersection id and value is all possible actions for each id as specified in tlphases """ return spaces.Dict({inters:spaces.Discrete(self._tlphases.getNrPhases(inters)) \ for inters in self._tlphases.getIntersectionIds()}) def _set_lights(self, actions: spaces.Dict): """ Take the specified actions in the environment @param actions a list of """ for intersectionId in actions.keys(): action = self._intToPhaseString(intersectionId, actions.get(intersectionId)) # Retrieve the action that was taken the previous step try: prev_action = self._takenActions[intersectionId][-1] except KeyError: # If KeyError, this is the first time any action was taken for this intersection prev_action = action self._takenActions.update({intersectionId: []}) self._yellowTimer.update({intersectionId: 0}) # Check if the given action is different from the previous action if prev_action != action: # Either the this is a true switch or coming grom yellow action, self._yellowTimer[ intersectionId] = self._correct_action( prev_action, action, self._yellowTimer[intersectionId]) # Set traffic lights self.ldm.setRedYellowGreenState(intersectionId, action) self._takenActions[intersectionId].append(action) def _correct_action(self, prev_action, action, timer): """ Check what we are going to do with the given action based on the previous action. """ # Check if the agent was in a yellow state the previous step if 'y' in prev_action: # Check if this agent is in the middle of its yellow state if timer > 0: new_action = prev_action timer -= 1 # Otherwise we can get out of the yellow state else: new_action = self._chosen_action if not isinstance(new_action, str): raise Exception("chosen action is illegal") # We are switching from green to red, initialize the yellow state else: self._chosen_action = action if self._parameters['y_t'] > 0: new_action = prev_action.replace('G', 'y') timer = self._parameters['y_t'] - 1 else: new_action = action timer = 0 return new_action, timer
class SumoGymAdapter(Env): """ An adapter that makes Sumo behave as a proper Gym environment. At top level, the actionspace and percepts are in a Dict with the trafficPHASES as keys. @param maxConnectRetries the max number of retries to connect. A retry is needed if the randomly chosen port to connect to SUMO is already in use. """ _DEFAULT_PARAMETERS = { 'gui': True, # gui or not 'scenarios_path': os.path.join(os.path.dirname(__file__), "../../scenarios/Sumo/"), 'scene': 'four_grid', # subdirectory in the aienvs/scenarios/Sumo directory where 'tlphasesfile': None, # Use None to read phases from net file, otherwise only relative name 'box_bottom_corner': (0, 0), # bottom left corner of the observable frame 'box_top_corner': (10, 10), # top right corner of the observable frame 'resolutionInPixelsPerMeterX': 1, # for the observable frame 'resolutionInPixelsPerMeterY': 1, # for the observable frame 'y_t': 6, # yellow time 'generate_conf': True, # for automatic route/config generation 'simulation_start_time': '0', # The start time of the sumo simulation in seconds 'reward_range': [100], 'route_generation_method': 'undefined', # One of ['legacy', 'randomTrips.py', 'activitygen'] # Options for 'route_generation_method' 'activitygen' 'activitygen_options': [], # e.g. ["--end", endtime] 'stat_file': None, # stat file used. Leave none for automatic search. # Options for 'route_generation_method' 'randomTrips.py' 'trips_generate_options': [], # sumo/tools/randomTrips.py additional options. -n, -o, --validate already handled! # Custom route and trip generation if route_generation_method is set to 'legacy' 'car_pr': 0.5, # for automatic route/config generation probability that a car appears 'car_tm': 2, # for automatic route/config generation when the first car appears? 'route_starts': [], # for automatic route/config generation, ask Rolf 'route_min_segments': 0, # for automatic route/config generation, ask Rolf 'route_max_segments': 0, # for automatic route/config generation, ask Rolf 'route_ends': [], # for automatic route/config generation, ask Rolf 'seed': None, # Used to seed sumo, and to generate the traffic by all generation methods. 'libsumo': False, # whether libsumo is used instead of traci 'waiting_penalty': 1, # penalty for waiting 'new_reward': False, # some other type of reward ask Miguel 'lightPositions': {}, # specify traffic light positions 'scaling_factor': 1.0, # for rescaling the reward? ask Miguel 'maxConnectRetries': 50, # maximum reattempts to connect by Traci 'seed': None, 'reward_function': "default", #options include default, eval and elise 'maxConnectRetries': 50 # maximum reattempts to connect by Traci } def __init__(self, parameters: dict = {}, init_state=True): """ @param path where results go, like "Experiment ID" @param parameters the configuration parameters. gui: whether we show a GUI. scenario: the path to the scenario to use """ logging.debug(parameters) self._parameters = copy.deepcopy(self._DEFAULT_PARAMETERS) self._parameters.update(parameters) self._scenario_path = os.path.join(self._parameters['scenarios_path'], self._parameters['scene']) if self._parameters['tlphasesfile'] is None: tlPhasesFile = self._parameters[ 'tlphasesfile'] = self.get_net_file() else: tlPhasesFile = os.path.join(self._parameters['scenarios_path'], self._parameters['scene'], self._parameters['tlphasesfile']) self._tlphases = TrafficLightPhases(tlPhasesFile) self.ldm = ldm(using_libsumo=self._parameters['libsumo']) self._takenActions = {} self._yellowTimer = {} self._chosen_action = None self.seed(parameters['seed']) # in case no seed is given self._action_space = self._getActionSpace() # TODO: Wouter: make state configurable ("state factory") if init_state: self._state = LdmMatrixState(self.ldm, [ self._parameters['box_bottom_corner'], self._parameters['box_top_corner'] ], self._parameters["reward_range"], "byCorners") else: self._state = None # Computed when needed instead of in __init__: self._observation_space = None def update_parameters(self, updated_params: dict): """ Updates the parameters. Please note that some of the parameter changes may only be propagated after resetting the environment. """ self._parameters.update(updated_params) def set_stat_file(self, stat_file: str): self.update_parameters({"stat_file": stat_file}) def _compute_observation_space(self): self._startSUMO(gui=False) _s = self._observe() self.frame_height = _s.shape[0] self.frame_width = _s.shape[1] return Box(low=0, high=1.0, shape=(self.frame_height, self.frame_width), dtype=np.float32) def step(self, actions: dict): self._set_lights(actions) self.ldm.step() obs = self._observe() done = self.ldm.isSimulationFinished() global_reward_list = self._computeGlobalReward( self._parameters['reward_function']) if len(self._parameters['reward_range']) == 1: return obs, global_reward_list[self._parameters['reward_range'] [0]], done, [] else: return obs, global_reward_list, done, [] # as in openai gym, last one is the info list # return obs, global_reward, done, [] def reset(self): try: logging.debug("LDM closed by resetting") self.ldm.close() except: logging.debug( "No LDM to close. Perhaps it's the first instance of training") logging.debug("Starting SUMO environment...") self._startSUMO() return self._observe() # TODO: change the defaults to something sensible def render(self, delay=0.0): import colorama colorama.init() def move_cursor(x, y): print("\x1b[{};{}H".format(y + 1, x + 1)) def clear(): print("\x1b[2J") clear() move_cursor(100, 100) import numpy as np np.set_printoptions(linewidth=100) print(self._observe()) time.sleep(delay) def seed(self, seed): self._seed = seed def close(self): self.__del__() @property def observation_space(self): # # this is the previous method, which does not take resolution into consideration # size = self._state.size() # return Box(low=0, high=np.inf, shape=(size[0], size[1]), dtype=np.int32) if self._observation_space is None: self._observation_space = self._compute_observation_space() return self._observation_space @property def action_space(self): return self._action_space ########## Private functions ########################## def __del__(self): logging.debug("LDM closed by destructor") if 'ldm' in locals(): self.ldm.close() def _startSUMO(self, gui=None): """ Start the connection with SUMO as a subprocess and initialize the traci port, generate route file. """ val = 'sumo' if gui is True: val = 'sumo-gui' elif gui is None: val = 'sumo-gui' if self._parameters['gui'] else 'sumo' maxRetries = self._parameters['maxConnectRetries'] sumo_binary = checkBinary(val) # Try repeatedly to connect while True: try: # this cannot be seeded self._port = random.SystemRandom().choice( list(range(10000, 20000))) self._sumo_helper = SumoHelper(self._parameters, self._port, self._seed) conf_file = self._sumo_helper.sumocfg_file logging.debug("Configuration: " + str(conf_file)) sumoCmd = [sumo_binary, "-c", conf_file, "-W", "-v", "false"] # shut up SUMO if self._seed is not None: sumoCmd += ["--seed", str(self._seed)] self.ldm.start(sumoCmd, self._port) except Exception as e: if str(e) == "connection closed by SUMO" and maxRetries > 0: maxRetries = maxRetries - 1 continue else: raise else: break self.ldm.init( waitingPenalty=self._parameters['waiting_penalty'], new_reward=self._parameters['new_reward']) # ignore reward for now self.ldm.setResolutionInPixelsPerMeter( self._parameters['resolutionInPixelsPerMeterX'], self._parameters['resolutionInPixelsPerMeterY']) self.ldm.setPositionOfTrafficLights(self._parameters['lightPositions']) if list(self.ldm.getTrafficLights() ) != self._tlphases.getIntersectionIds(): raise Exception( "environment traffic lights do not match those in the tlphasesfile " + self._parameters['tlphasesfile'] + str(self.ldm.getTrafficLights()) + str(self._tlphases.getIntersectionIds())) def _intToPhaseString(self, intersectionId: str, lightPhaseId: int): """ @param intersectionid the intersection(light) id @param lightvalue the PHASES value @return the intersection PHASES string eg 'rrGr' or 'GGrG' """ logging.debug("lightPhaseId" + str(lightPhaseId)) return self._tlphases.getPhase(intersectionId, lightPhaseId) def _observe(self): """ Fetches the Sumo state and converts in a proper gym observation. The keys of the dict are the intersection IDs (roughly, the trafficLights) The values are the state of the TLs """ return self._state.update_state() def _computeGlobalReward(self, function): """ Computes the global reward """ rewards: dict = self._state.update_reward(function) for k in rewards.keys(): rewards[k] = rewards[k] / self._parameters['scaling_factor'] return rewards def _getActionSpace(self): """ @returns the actionspace: a dict containing <id,phases> where id is the intersection id and value is all possible actions for each id as specified in tlphases """ return spaces.Dict({inters:spaces.Discrete(self._tlphases.getNrPhases(inters)) \ for inters in self._tlphases.getIntersectionIds()}) def _set_lights(self, actions: spaces.Dict): """ Take the specified actions in the environment @param actions a list of """ for intersectionId in actions.keys(): action = self._intToPhaseString(intersectionId, actions.get(intersectionId)) # Retrieve the action that was taken the previous step try: prev_action = self._takenActions[intersectionId] except KeyError: # If KeyError, this is the first time any action was taken for this intersection prev_action = action self._takenActions.update({intersectionId: action}) self._yellowTimer.update({intersectionId: 0}) # Check if the given action is different from the previous action if prev_action != action: # Either the this is a true switch or coming grom yellow action, self._yellowTimer[ intersectionId] = self._correct_action( prev_action, action, self._yellowTimer[intersectionId]) # Set traffic lights self.ldm.setRedYellowGreenState(intersectionId, action) self._takenActions[intersectionId] = action def _correct_action(self, prev_action, action, timer): """ Check what we are going to do with the given action based on the previous action. """ # Check if the agent was in a yellow state the previous step if 'y' in prev_action: # Check if this agent is in the middle of its yellow state if timer > 0: new_action = prev_action timer -= 1 # Otherwise we can get out of the yellow state else: new_action = self._chosen_action if not isinstance(new_action, str): raise Exception("chosen action is illegal") # We are switching from green to red, initialize the yellow state else: self._chosen_action = action if self._parameters['y_t'] > 0: new_action = prev_action.replace('G', 'y') timer = self._parameters['y_t'] - 1 else: new_action = action timer = 0 return new_action, timer # Returns full path def get_net_file(self): net_files = glob.glob(self._scenario_path + '/*.net.xml') assert len( net_files ) == 1, f"Expected exactly one netfile, but netfiles: {net_files}" return net_files[0]