コード例 #1
0
ファイル: environment.py プロジェクト: epignatelli/helx
    def __init__(self, env):
        super().__init__(env)

        current = self.env.observation_space["image"]
        self.observation_space.spaces["image"] = spaces.Box(
            low=min(OBJECT_TO_IDX.values()),
            high=max(OBJECT_TO_IDX.values()),
            shape=(self.env.width, self.env.height),
            dtype="uint8",
        )
コード例 #2
0
 def __init__(self, env):
     super().__init__(env)
     os = self.observation_space.spaces['image']
     os.high[:, :, 0] = max(OBJECT_TO_IDX.values())
     if os.high.shape[2] >= 2:
         os.high[:, :, 1] = max(COLOR_TO_IDX.values())
     if os.high.shape[2] >= 3:
         os.high[:, :, 2] = 3
     self.observation_space.spaces['image'] = os
コード例 #3
0
    def __init__(self, env):
        super().__init__(env)
        sz = self.observation_space.spaces['image'].shape
        npo = np.zeros(sz, dtype=np.object)
        for i in range(sz[0]):
            for j in range(sz[1]):
                for k in range(sz[2]):
                    if k == 0:
                        n = max(OBJECT_TO_IDX.values()) + 1
                    elif k == 1:
                        n = max(COLOR_TO_IDX.values()) + 1
                    elif k == 2:
                        n = 4
                    else:
                        raise Exception("Bad k")

                    npo[i, j, k] = Discrete(n)
        ospace = tuple(npo.flat)

        sz = np.cumsum([o.n for o in ospace])
        sz = sz - sz[0]
        self.sz = sz
        self.observation_space = ospace
コード例 #4
0
DIR_TO_NUM = {
    (1, 0): 0,
    (0, -1): 3,
    (-1, 0): 2,
    (0, 1): 1,
}
NUM_TO_DIR = dict([(v, k) for k, v in DIR_TO_NUM.items()])

ACTION_TO_NUM_MAPPING = {
    'left': 0,
    'right': 1,
    'forward': 2,
    'toggle': 5,
    'done': 6,
}
IDX_TO_OBJECT = dict([(v, k) for k, v in OBJECT_TO_IDX.items()])

class PlanAgent:
    # Agent that has a plan! :D
    def __init__(self, env):
        # Given the env, get all the parameters like size and agent details
        # Env would be wrapped in a wrapper that gives agent location and direction
        self.env = env.env
        self.agent_view_size = self.env.agent_view_size
        self.width = self.env.width
        self.height = self.env.height

        self.epsilon = 0
        self.numobjects = len(OBJECT_TO_IDX) - 1
        self.numcolors = len(COLOR_TO_IDX)
        self.numstates = len(STATE_TO_IDX)
コード例 #5
0
    def __init__(
        self,
        grid_size: Optional[int] = None,
        width: Optional[int] = None,
        height: Optional[int] = None,
        max_steps: int = 100,
        see_through_walls: bool = False,
        seed: int = 1337,
    ):
        if grid_size:
            assert width == None and height == None
            width = grid_size
            height = grid_size

        # Override MiniGridEnv actions
        self.actions = MiniGridSimple.CardinalActions

        self.move_actions = [
            self.actions.right,
            self.actions.down,
            self.actions.left,
            self.actions.up,
        ]

        self.action_space = spaces.Discrete(len(self.actions))

        # self.encoding_range = len(MINIMAL_OBJECT_TO_IDX.keys())
        self.encoding_range = len(OBJECT_TO_IDX.keys())

        self.agent_pos_observation = spaces.Tuple(
            [spaces.Discrete(grid_size),
             spaces.Discrete(grid_size)])

        self.observation_space = spaces.Dict({
            'agent_pos':
            self.agent_pos_observation,
        })

        # Renderer object used to render the whole grid (full-scale)
        self.grid_render = None

        # Renderer used to render observations (small-scale agent view)
        self.obs_render = None

        # Environment configuration
        self.width = width
        self.height = height
        self.max_steps = max_steps

        self.see_through_walls = see_through_walls

        # Starting position and direction for the agent
        self.start_pos = None
        self.start_dir = None

        self._done = False

        # Initialize the RNG
        self.seed(seed=seed)

        # Rendering
        self.render_rgb = True
        self.CELL_PIXELS = CELL_PIXELS
        self.render_shape = (self.width * self.CELL_PIXELS,
                             self.height * self.CELL_PIXELS, 3)

        # Initialize the state
        self.reset()
コード例 #6
0
    def one_hot(self, obs):

        one_hot_obs = {}

        # One-hotify direction
        NUM_DIRECTIONS = 4
        dir = np.zeros(NUM_DIRECTIONS)
        dir[obs["direction"]] = 1
        one_hot_obs["direction"] = dir

        # One-hotify mission
        # TODO: This is tricky!  Missions can be arbitrary lengths, so we could either
        #       (a) One-hot encode each token sequentially, return a variable-length array
        #       (b) Give a fix-sized mission vector where smaller missions are zero-padded
        #       (c) Assume we'll only use levels with a particular mission structure
        #       The code currently does (c).  We assume there's at most one object mentioned.
        #       But at some point we should probably switch to the more generalizable version using the babyai
        #       repo's preprocessor InstructionsPreprocessor in file utils/format.py
        obj_id = np.zeros((len(OBJECT_TO_IDX.keys())))
        colors_id = np.zeros((len(COLOR_TO_IDX.keys()) + 1))
        found_color = False
        for key, index in COLOR_TO_IDX.items():
            if key in obs["mission"]:
                colors_id[index] = 1
                found_color = True
        if not found_color:
            colors_id[-1] = 1
        for key, index in OBJECT_TO_IDX.items():
            if key in obs["mission"]:
                obj_id[index] = 1
        one_hot_obs["mission"] = np.concatenate([obj_id, colors_id], axis=0)

        # One-hotify grid observation
        num_objects = len(OBJECT_TO_IDX.keys())
        num_colors = len(COLOR_TO_IDX.keys())
        num_states = 3 # open, closed or locked.  Looks like they don't have an enum for it.
        # Observation space is [height, width, 3]
        image = obs["image"]
        height, width, _ = image.shape
        height_index = np.repeat(np.arange(height), width).reshape(height, width)
        width_index = np.tile(np.arange(width), height).reshape(height, width)

        # First layer has object IDs
        obj_ids = np.zeros((height, width, num_objects))
        obj_ids[height_index, width_index, image[:,:,0]] = 1

        # Second layer has color IDs
        color_ids = np.zeros((height, width, num_colors))
        color_ids[height_index, width_index, image[:,:,1]] = 1

        # Third layer has state IDs
        state_ids = np.zeros((height, width, num_states))
        state_ids[height_index, width_index, image[:,:,2]] = 1

        image = np.concatenate([obj_ids, color_ids, state_ids], axis=2).transpose((2, 0, 1))

        # Zero-pad observations to the nearest multiple of 2.
        channels, height, width = image.shape
        pow_2_height = int(2 ** np.ceil(np.log2(height)))
        pow_2_width = int(2 ** np.ceil(np.log2(width)))
        pow_2_image = np.zeros((channels, pow_2_height, pow_2_width))
        pow_2_image[:, :height, :width] = image

        one_hot_obs["image"] = pow_2_image

        return one_hot_obs