class Beacon1DEnv(HeroEnv): STATE_MAP = {(0, 'B'): (0, 1.0, True, None)} ACTION_MAP = Direction.left_right() def create_world(self): self.game_state['hero'] = self.reset_world() return self.world def reset_world(self): self.world = GridWorld((10, 1)) locations = self.generate_instance_positions(instance=level) hero_pos = (0, 0) hero = self.world.add_object( GridObject('H', hero_pos, Color.green, render_preference=1)) beacon = self.world.add_object( GridObject('B', locations[-1], Color.darkOrange)) locations.remove(locations[-1]) # Add walls to the right of the goal while len(locations): wall = self.world.add_object( GridObject('W', locations[-1], Color.white)) # wall.collides_with(hero) #Make it block the hero's way (not really needed rightnow since ends at goal, no transitions added) locations.remove(locations[-1]) return hero def generate_instance_positions(self, instance=0): # Add object positions positions = [] for t in range(instance + 1): positions.append((self.world.grid_size.x - (t + 1), 0)) return positions
class MoveToBeaconEnv(HeroEnv): STATE_MAP = {(0, 'B'): (0, 1.0, True, None)} ACTION_MAP = Direction.cardinal() def create_world(self): self.game_state['hero'] = self.reset_world() return self.world def reset_world(self): self.world = GridWorld((10, 10)) quadrant_hero = np.random.randint(4) quadrant_beacon = np.random.choice( list(set(range(4)) - {quadrant_hero})) hero_pos = self.generate_random_position() beacon_pos = self.generate_random_position() while beacon_pos == hero_pos: beacon_pos = self.generate_random_position() hero = self.world.add_object( GridObject('H', hero_pos, Color.green, render_preference=1)) beacon = self.world.add_object( GridObject('B', beacon_pos, Color.darkOrange)) return hero def generate_random_position(self): x = np.random.randint(0, self.world.grid_size.x) y = np.random.randint(0, self.world.grid_size.y) return (x, y)
def reset_world(self): self.world = GridWorld((self.size_x, self.size_y)) self.game_state["hero"] = GridObject( 'F', (int(self.size_x / 2), self.size_y - 1), rgb=Color.green) #frog self.world.add_object(self.game_state["hero"]) for i in range(self.size_x): self.world.add_object(GridObject('G', (i, 0), rgb=Color.blue)) #goal self.game_state["step_next_car"] = [None] * (self.size_y - 2) for i in range(self.size_y - 2): current_car_pos = 0 #fill grid with cars while True: current_car_pos += self.get_relative_time() + 1 if current_car_pos < self.size_x: self.world.add_object( GridObject('C', (current_car_pos, i + 1), rgb=Color.red)) else: break #get step at which a new car will be generated, for each row i self.game_state["step_next_car"][i] = self.get_relative_time() + 1 return self.game_state["hero"]
def __init__(self, n_actions, max_moves=None, pixel_size=(84, 84), using_immutable_states=False, fixed_init_state=False): self.max_moves = max_moves assert self.max_moves is None or self.max_moves > 0 self.pixel_size = tuple(pixel_size) self.fixed_init_state = fixed_init_state self.using_immutable_states = using_immutable_states self.action_space = Discrete(n_actions) self.observation_space = Box(0, 255, shape=self.pixel_size + (3,), dtype=np.uint8) self.world = GridWorld() self._state = {"done": True} # We are forced to reset
def reset_world(self): self.world = GridWorld((10, 10)) quadrant_hero = np.random.randint(4) quadrant_beacon = np.random.choice( list(set(range(4)) - {quadrant_hero})) hero_pos = self.generate_random_position() beacon_pos = self.generate_random_position() while beacon_pos == hero_pos: beacon_pos = self.generate_random_position() hero = self.world.add_object( GridObject('H', hero_pos, Color.green, render_preference=1)) beacon = self.world.add_object( GridObject('B', beacon_pos, Color.darkOrange)) return hero
def reset_world(self): self.world = GridWorld((10, 1)) locations = self.generate_instance_positions(instance=level) hero_pos = (0, 0) hero = self.world.add_object( GridObject('H', hero_pos, Color.green, render_preference=1)) beacon = self.world.add_object( GridObject('B', locations[-1], Color.darkOrange)) locations.remove(locations[-1]) # Add walls to the right of the goal while len(locations): wall = self.world.add_object( GridObject('W', locations[-1], Color.white)) # wall.collides_with(hero) #Make it block the hero's way (not really needed rightnow since ends at goal, no transitions added) locations.remove(locations[-1]) return hero
def create_world_from_string_map(str_map, colors, hero_mark): world = GridWorld((len(str_map[0]), len(str_map))) hero = None for y, string in enumerate(str_map): for x, point in enumerate(string): if point == '.': continue else: obj_name = str_map[y][x] assert obj_name in colors.keys(), "Please define a color for object %s"%obj_name color = colors[obj_name] o = GridObject(name=point, pos=(x, y), rgb=color) if point == hero_mark: o.render_preference = 1 hero = o world.add_object(o) assert hero is not None, "Hero could not be loaded. Hero mark not in string map?" return hero, world
class GridEnv(gym.Env): """ This class should not be instantiated It models a game based on colored squares/rectangles in a 2D space """ def __init__(self, n_actions, max_moves=None, pixel_size=(84, 84), using_immutable_states=False, fixed_init_state=False): self.max_moves = max_moves assert self.max_moves is None or self.max_moves > 0 self.pixel_size = tuple(pixel_size) self.fixed_init_state = fixed_init_state self.using_immutable_states = using_immutable_states self.action_space = Discrete(n_actions) self.observation_space = Box(0, 255, shape=self.pixel_size + (3,), dtype=np.uint8) self.world = GridWorld() self._state = {"done": True} # We are forced to reset def seed(self, seed): np.random.seed(seed) # TODO: use own random state instead of global one, allow seed=None return seed def step(self, action): assert not self._state["done"], "The environment needs to be reset." next_state, r, done, info = self.get_next_state(self._state["state"], action) moves = self._state["moves"] + 1 if self.max_moves is not None and moves >= self.max_moves: done = True obs = self.world.render(self.get_gridstate(next_state), size=self.pixel_size) self._state = {"state": next_state, "moves": moves, "done": done} return (obs, r, done, info) def reset(self): if self.fixed_init_state: try: init_state = self.init_state except AttributeError: init_state = self.init_state = self.get_init_state() self.restore_state({"state": init_state, "moves": 0, "done": False}) else: self._state = {"state": self.get_init_state(), "moves": 0, "done": False} obs = self.world.render(self.get_gridstate(self._state["state"]), size=self.pixel_size) return obs def render(self, size=None): if size is None: size = self.pixel_size img = self.world.render(self.get_gridstate(self._state["state"]), size=size) try: self.viewer.imshow(img) except AttributeError: from gym.envs.classic_control import rendering self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) return self.viewer.isopen def clone_state(self): if self.using_immutable_states: return self._state return deepcopy(self._state) def restore_state(self, internal_state): if self.using_immutable_states: self._state = internal_state else: self._state = deepcopy(internal_state) def get_char_matrix(self): return self.world.get_char_matrix(self.get_gridstate(self._state["state"])) #.view(np.uint32) def get_init_state(self): """ To be implemented by child classes. It will be called at each environment reset if fixed_init_state is False, or only once at initialization otherwise. """ raise NotImplementedError def get_next_state(self, state, action): """ To be implemented by child classes. Returns a tuple: reward, episode_done, info_dict """ raise NotImplementedError() def get_gridstate(self, state): """ To be implemented by child classes. :return: iterable of grid objects """ raise NotImplementedError() def __repr__(self): if 'state' in self._state.keys(): return "\n".join([" ".join(row) for row in self.get_char_matrix()]) else: return super(GridEnv, self).__repr__() def __del__(self): try: self.viewer.close() except AttributeError: pass
class FreewayEnv(HeroEnv): ACTION_MAP = [None, Direction.N, Direction.S] def __init__(self, size, obs_type="image", avg_cars=0.2, episode_end="moves"): assert episode_end in ("moves", "collision") max_moves = None if episode_end == "collision" else 100 assert size >= 3 # At least one row for starting point, one for goal and one for cars. avg_cars_per_step = avg_cars self.size_x = self.size_y = size self.mean_relative_time = 1 / avg_cars_per_step # mean waiting steps before generating a new car, at every row. self.STATE_MAP = { (0, 'C'): (0, -1.0, episode_end == "collision", self.reset_hero), (0, 'G'): (0, 1.0, episode_end == "collision", self.reset_hero) } super(FreewayEnv, self).__init__(max_moves, obs_type) def reset_hero(self, env, c): self.game_state["hero"].pos = Point(int(self.size_x / 2), self.size_y - 1) def reset_world(self): self.world = GridWorld((self.size_x, self.size_y)) self.game_state["hero"] = GridObject( 'F', (int(self.size_x / 2), self.size_y - 1), rgb=Color.green) #frog self.world.add_object(self.game_state["hero"]) for i in range(self.size_x): self.world.add_object(GridObject('G', (i, 0), rgb=Color.blue)) #goal self.game_state["step_next_car"] = [None] * (self.size_y - 2) for i in range(self.size_y - 2): current_car_pos = 0 #fill grid with cars while True: current_car_pos += self.get_relative_time() + 1 if current_car_pos < self.size_x: self.world.add_object( GridObject('C', (current_car_pos, i + 1), rgb=Color.red)) else: break #get step at which a new car will be generated, for each row i self.game_state["step_next_car"][i] = self.get_relative_time() + 1 return self.game_state["hero"] def move_cars(self): # Move cars cars_to_remove = [] for o in self.world.objects: if o.name == 'C': if not self.move( o, Direction.E ): #if we cannot move, it's because we reached the right edge cars_to_remove.append(o) # Remove the ones that were getting out of the grid for car in cars_to_remove: self.world.objects.remove(car) # Add new cars for i in range(self.size_y - 2): if self.game_state["step_next_car"][i] == self.game_state["moves"]: self.world.add_object( GridObject('C', (0, i + 1), rgb=Color.red)) self.game_state["step_next_car"][i] = self.get_relative_time( ) + self.game_state["moves"] + 1 def get_relative_time(self): """ Poisson process: Gives the relative time at which an event is generated, sampled from the exponential distribution: F(x) = 1 - e^(-l*x) The next timestep is given by the inverse: x = -ln(U) / l, with the rate l=1/mean_relative_time. Returns: relative time """ return int( round(-np.log(1.0 - np.random.rand()) * self.mean_relative_time) ) # 1-rand with lambda=rate.because random.random returns a value in [0,1) and we want a value in (0,1], to avoid log(0) def update_world(self): self.move_cars() return super(FreewayEnv, self).update_world()