Example #1
0
    class Beacon1DEnv(HeroEnv):
        STATE_MAP = {(0, 'B'): (0, 1.0, True, None)}
        ACTION_MAP = Direction.left_right()

        def create_world(self):
            self.game_state['hero'] = self.reset_world()
            return self.world

        def reset_world(self):
            self.world = GridWorld((10, 1))
            locations = self.generate_instance_positions(instance=level)
            hero_pos = (0, 0)
            hero = self.world.add_object(
                GridObject('H', hero_pos, Color.green, render_preference=1))
            beacon = self.world.add_object(
                GridObject('B', locations[-1], Color.darkOrange))
            locations.remove(locations[-1])
            # Add walls to the right of the goal
            while len(locations):
                wall = self.world.add_object(
                    GridObject('W', locations[-1], Color.white))
                # wall.collides_with(hero) #Make it block the hero's way (not really needed rightnow since ends at goal, no transitions added)
                locations.remove(locations[-1])

            return hero

        def generate_instance_positions(self, instance=0):
            # Add object positions
            positions = []
            for t in range(instance + 1):
                positions.append((self.world.grid_size.x - (t + 1), 0))
            return positions
Example #2
0
class MoveToBeaconEnv(HeroEnv):
    STATE_MAP = {(0, 'B'): (0, 1.0, True, None)}
    ACTION_MAP = Direction.cardinal()

    def create_world(self):
        self.game_state['hero'] = self.reset_world()
        return self.world

    def reset_world(self):
        self.world = GridWorld((10, 10))
        quadrant_hero = np.random.randint(4)
        quadrant_beacon = np.random.choice(
            list(set(range(4)) - {quadrant_hero}))
        hero_pos = self.generate_random_position()
        beacon_pos = self.generate_random_position()
        while beacon_pos == hero_pos:
            beacon_pos = self.generate_random_position()
        hero = self.world.add_object(
            GridObject('H', hero_pos, Color.green, render_preference=1))
        beacon = self.world.add_object(
            GridObject('B', beacon_pos, Color.darkOrange))
        return hero

    def generate_random_position(self):
        x = np.random.randint(0, self.world.grid_size.x)
        y = np.random.randint(0, self.world.grid_size.y)
        return (x, y)
Example #3
0
    def reset_world(self):
        self.world = GridWorld((self.size_x, self.size_y))
        self.game_state["hero"] = GridObject(
            'F', (int(self.size_x / 2), self.size_y - 1),
            rgb=Color.green)  #frog
        self.world.add_object(self.game_state["hero"])

        for i in range(self.size_x):
            self.world.add_object(GridObject('G', (i, 0),
                                             rgb=Color.blue))  #goal

        self.game_state["step_next_car"] = [None] * (self.size_y - 2)
        for i in range(self.size_y - 2):
            current_car_pos = 0
            #fill grid with cars
            while True:
                current_car_pos += self.get_relative_time() + 1
                if current_car_pos < self.size_x:
                    self.world.add_object(
                        GridObject('C', (current_car_pos, i + 1),
                                   rgb=Color.red))
                else:
                    break
            #get step at which a new car will be generated, for each row i
            self.game_state["step_next_car"][i] = self.get_relative_time() + 1

        return self.game_state["hero"]
Example #4
0
 def __init__(self, n_actions, max_moves=None, pixel_size=(84, 84), using_immutable_states=False, fixed_init_state=False):
     self.max_moves = max_moves
     assert self.max_moves is None or self.max_moves > 0
     self.pixel_size = tuple(pixel_size)
     self.fixed_init_state = fixed_init_state
     self.using_immutable_states = using_immutable_states
     self.action_space = Discrete(n_actions)
     self.observation_space = Box(0, 255, shape=self.pixel_size + (3,), dtype=np.uint8)
     self.world = GridWorld()
     self._state = {"done": True}  # We are forced to reset
Example #5
0
 def reset_world(self):
     self.world = GridWorld((10, 10))
     quadrant_hero = np.random.randint(4)
     quadrant_beacon = np.random.choice(
         list(set(range(4)) - {quadrant_hero}))
     hero_pos = self.generate_random_position()
     beacon_pos = self.generate_random_position()
     while beacon_pos == hero_pos:
         beacon_pos = self.generate_random_position()
     hero = self.world.add_object(
         GridObject('H', hero_pos, Color.green, render_preference=1))
     beacon = self.world.add_object(
         GridObject('B', beacon_pos, Color.darkOrange))
     return hero
Example #6
0
        def reset_world(self):
            self.world = GridWorld((10, 1))
            locations = self.generate_instance_positions(instance=level)
            hero_pos = (0, 0)
            hero = self.world.add_object(
                GridObject('H', hero_pos, Color.green, render_preference=1))
            beacon = self.world.add_object(
                GridObject('B', locations[-1], Color.darkOrange))
            locations.remove(locations[-1])
            # Add walls to the right of the goal
            while len(locations):
                wall = self.world.add_object(
                    GridObject('W', locations[-1], Color.white))
                # wall.collides_with(hero) #Make it block the hero's way (not really needed rightnow since ends at goal, no transitions added)
                locations.remove(locations[-1])

            return hero
Example #7
0
def create_world_from_string_map(str_map, colors, hero_mark):
    world = GridWorld((len(str_map[0]), len(str_map)))

    hero = None
    for y, string in enumerate(str_map):
        for x, point in enumerate(string):
            if point == '.':
                continue
            else:
                obj_name = str_map[y][x]
                assert obj_name in colors.keys(), "Please define a color for object %s"%obj_name
                color = colors[obj_name]

                o = GridObject(name=point, pos=(x, y), rgb=color)
                if point == hero_mark:
                    o.render_preference = 1
                    hero = o
                world.add_object(o)

    assert hero is not None, "Hero could not be loaded. Hero mark not in string map?"
    return hero, world
Example #8
0
class GridEnv(gym.Env):
    """
        This class should not be instantiated
        It models a game based on colored squares/rectangles in a 2D space
    """

    def __init__(self, n_actions, max_moves=None, pixel_size=(84, 84), using_immutable_states=False, fixed_init_state=False):
        self.max_moves = max_moves
        assert self.max_moves is None or self.max_moves > 0
        self.pixel_size = tuple(pixel_size)
        self.fixed_init_state = fixed_init_state
        self.using_immutable_states = using_immutable_states
        self.action_space = Discrete(n_actions)
        self.observation_space = Box(0, 255, shape=self.pixel_size + (3,), dtype=np.uint8)
        self.world = GridWorld()
        self._state = {"done": True}  # We are forced to reset

    def seed(self, seed):
        np.random.seed(seed)  # TODO: use own random state instead of global one, allow seed=None
        return seed

    def step(self, action):
        assert not self._state["done"], "The environment needs to be reset."
        next_state, r, done, info = self.get_next_state(self._state["state"], action)
        moves = self._state["moves"] + 1
        if self.max_moves is not None and moves >= self.max_moves:
            done = True
        obs = self.world.render(self.get_gridstate(next_state), size=self.pixel_size)
        self._state = {"state": next_state,
                       "moves": moves,
                       "done": done}
        return (obs, r, done, info)

    def reset(self):
        if self.fixed_init_state:
            try:
                init_state = self.init_state
            except AttributeError:
                init_state = self.init_state = self.get_init_state()

            self.restore_state({"state": init_state,
                                "moves": 0,
                                "done": False})
        else:
            self._state = {"state": self.get_init_state(),
                           "moves": 0,
                           "done": False}

        obs = self.world.render(self.get_gridstate(self._state["state"]), size=self.pixel_size)
        return obs

    def render(self, size=None):
        if size is None:
            size = self.pixel_size
        img = self.world.render(self.get_gridstate(self._state["state"]), size=size)
        try:
            self.viewer.imshow(img)
        except AttributeError:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)
        return self.viewer.isopen

    def clone_state(self):
        if self.using_immutable_states:
            return self._state
        return deepcopy(self._state)

    def restore_state(self, internal_state):
        if self.using_immutable_states:
            self._state = internal_state
        else:
            self._state = deepcopy(internal_state)

    def get_char_matrix(self):
        return self.world.get_char_matrix(self.get_gridstate(self._state["state"])) #.view(np.uint32)

    def get_init_state(self):
        """
        To be implemented by child classes. It will be called at each environment reset if fixed_init_state is False,
        or only once at initialization otherwise.
        """
        raise NotImplementedError

    def get_next_state(self, state, action):
        """
        To be implemented by child classes. Returns a tuple: reward, episode_done, info_dict
        """
        raise NotImplementedError()

    def get_gridstate(self, state):
        """
        To be implemented by child classes.
        :return: iterable of grid objects
        """
        raise NotImplementedError()

    def __repr__(self):
        if 'state' in self._state.keys():
            return "\n".join([" ".join(row) for row in self.get_char_matrix()])
        else:
            return super(GridEnv, self).__repr__()

    def __del__(self):
        try:
            self.viewer.close()
        except AttributeError:
            pass
Example #9
0
class FreewayEnv(HeroEnv):
    ACTION_MAP = [None, Direction.N, Direction.S]

    def __init__(self,
                 size,
                 obs_type="image",
                 avg_cars=0.2,
                 episode_end="moves"):
        assert episode_end in ("moves", "collision")
        max_moves = None if episode_end == "collision" else 100

        assert size >= 3  # At least one row for starting point, one for goal and one for cars.
        avg_cars_per_step = avg_cars

        self.size_x = self.size_y = size
        self.mean_relative_time = 1 / avg_cars_per_step  # mean waiting steps before generating a new car, at every row.

        self.STATE_MAP = {
            (0, 'C'): (0, -1.0, episode_end == "collision", self.reset_hero),
            (0, 'G'): (0, 1.0, episode_end == "collision", self.reset_hero)
        }
        super(FreewayEnv, self).__init__(max_moves, obs_type)

    def reset_hero(self, env, c):
        self.game_state["hero"].pos = Point(int(self.size_x / 2),
                                            self.size_y - 1)

    def reset_world(self):
        self.world = GridWorld((self.size_x, self.size_y))
        self.game_state["hero"] = GridObject(
            'F', (int(self.size_x / 2), self.size_y - 1),
            rgb=Color.green)  #frog
        self.world.add_object(self.game_state["hero"])

        for i in range(self.size_x):
            self.world.add_object(GridObject('G', (i, 0),
                                             rgb=Color.blue))  #goal

        self.game_state["step_next_car"] = [None] * (self.size_y - 2)
        for i in range(self.size_y - 2):
            current_car_pos = 0
            #fill grid with cars
            while True:
                current_car_pos += self.get_relative_time() + 1
                if current_car_pos < self.size_x:
                    self.world.add_object(
                        GridObject('C', (current_car_pos, i + 1),
                                   rgb=Color.red))
                else:
                    break
            #get step at which a new car will be generated, for each row i
            self.game_state["step_next_car"][i] = self.get_relative_time() + 1

        return self.game_state["hero"]

    def move_cars(self):
        # Move cars
        cars_to_remove = []
        for o in self.world.objects:
            if o.name == 'C':
                if not self.move(
                        o, Direction.E
                ):  #if we cannot move, it's because we reached the right edge
                    cars_to_remove.append(o)

        # Remove the ones that were getting out of the grid
        for car in cars_to_remove:
            self.world.objects.remove(car)

        # Add new cars
        for i in range(self.size_y - 2):
            if self.game_state["step_next_car"][i] == self.game_state["moves"]:
                self.world.add_object(
                    GridObject('C', (0, i + 1), rgb=Color.red))
                self.game_state["step_next_car"][i] = self.get_relative_time(
                ) + self.game_state["moves"] + 1

    def get_relative_time(self):
        """
        Poisson process:
        Gives the relative time at which an event is generated, sampled from
        the exponential distribution: F(x) = 1 - e^(-l*x)
        The next timestep is given by the inverse: x = -ln(U) / l, with the
        rate l=1/mean_relative_time.
        Returns: relative time
        """
        return int(
            round(-np.log(1.0 - np.random.rand()) * self.mean_relative_time)
        )  # 1-rand  with lambda=rate.because random.random returns a value in [0,1) and we want a value in (0,1], to avoid log(0)

    def update_world(self):
        self.move_cars()
        return super(FreewayEnv, self).update_world()