def sample_state(self):
        """
        produces a observation with one of each object
        :return obs: a sample observation
        :return agent_position: position of the agent within the observation
        """
        objects = [self.one_hot(i - 1) for i in range(1, 9)]
        grid = objects + [[
            0 for _ in range(
                self.observation_vector_space.spaces['observation'].shape[2])
        ] for _ in range(self.num_rows * self.num_cols - len(objects))]
        random.shuffle(grid)

        state = np.asarray(grid, dtype=int).reshape(
            self.observation_vector_space.spaces['observation'].shape)
        while np.argmax(state[self.agent_start[0]][self.agent_start[1]]) in [
                3, 4, 5, 6
        ]:
            # don't start agent on rock, tree, house or bread
            np.random.shuffle(state)
        agent_encoding = self.one_hot(8)
        state[self.agent_start[0]][self.agent_start[1]] += agent_encoding
        agent_position = coord(self.agent_start[0], self.agent_start[1],
                               self.num_rows - 1, self.num_cols - 1)

        return state, agent_position
Example #2
0
    def sample_state(self):
        """
        produces a observation with one of each object
        :return obs: a sample observation
        :return agent_position: position of the agent within the observation
        """
        objects = [_ for _ in range(1, 10)]
        objects = [self.one_hot(i - 1) for i in objects]
        grid = objects + [[
            0 for _ in range(
                self.observation_space.spaces['observation'].shape[2])
        ] for _ in range(self.num_rows * self.num_cols - len(objects))]
        random.shuffle(grid)

        state = np.asarray(grid, dtype=int).reshape(
            self.observation_space.spaces['observation'].shape)

        agent_position = coord(int(np.where(np.argmax(state, axis=2) == 8)[0]),
                               int(np.where(np.argmax(state, axis=2) == 8)[1]),
                               self.num_rows - 1, self.num_cols - 1)

        return state, agent_position
Example #3
0
    def __init__(self,
                 size=(10, 10),
                 fixed_init_state=None,
                 fixed_goal=None,
                 tasks_to_ignore=None,
                 store_gif=False,
                 render_flipping=False,
                 max_steps=300,
                 task_list=TASK_LIST,
                 pos_rewards=False):
        """
        change the following parameters to create a custom environment

        :param size: size of the grid world
        :param fixed_init_state: a fixed initial observation to reset to
        :param fixed_goal: a fixed list of tasks for the agent to achieve
        :param tasks_to_ignore: a list of tasks to ignore when calculating reward
        :param store_gif: whether or not to store every episode as a gif in a /renders/ subdirectory
        :param render_flipping: set to true if only specific episodes need to be rendered
        :param max_steps: max number of steps the agent can take
        :param task_list: list of possible tasks
        """
        self.metadata = {'render.modes': ['human', 'Non']}

        self.num_rows, self.num_cols = size

        self.max_steps = max_steps

        self.task_list = task_list
        if tasks_to_ignore:
            for task in tasks_to_ignore:
                self.task_list.remove(task)
        self.pos_rewards = pos_rewards

        self.observation_space = spaces.Dict({
            'observation':
            spaces.Box(low=0,
                       high=1,
                       shape=(self.num_rows, self.num_cols,
                              len(OBJECTS) + 1 + len(PICKUPABLE)),
                       dtype=int),
            'desired_goal':
            spaces.Box(low=0,
                       high=1,
                       shape=(1, len(self.task_list)),
                       dtype=int),
            'achieved_goal':
            spaces.Box(low=0,
                       high=1,
                       shape=(1, len(self.task_list)),
                       dtype=int),
        })
        # TODO: wrapper that flattens to regular env, wrapper that changes desired goal to dict of rewards, reward wrapper

        self.fixed_goal = fixed_goal
        if self.fixed_goal:
            self.desired_goal = np.zeros(shape=(1, len(self.task_list)),
                                         dtype=int)
            for goal in self.fixed_goal:
                if goal not in self.task_list:
                    self.fixed_goal.remove(goal)
                    continue
                self.desired_goal[0][self.task_list.index(goal)] = 1
        else:
            self.desired_goal = np.random.randint(2,
                                                  size=(1,
                                                        len(self.task_list)))

        self.achieved_goal = self.observation_space.spaces['achieved_goal'].low

        self.fixed_init_state = fixed_init_state

        if self.fixed_init_state is not None:
            self.obs = copy.deepcopy(self.fixed_init_state)
            self.agent_pos = coord(
                int(np.where(np.argmax(self.obs, axis=2) == 8)[0]),
                int(np.where(np.argmax(self.obs, axis=2) == 8)[1]),
                self.num_rows - 1, self.num_cols - 1)
        else:
            self.obs, self.agent_pos = self.sample_state()

        self.observation = {
            'observation': self.obs,
            'desired_goal': self.desired_goal,
            'achieved_goal': self.achieved_goal
        }
        self.init_observation = copy.deepcopy(self.observation)

        self.ACTIONS = [
            coord(-1, 0, name='up'),
            coord(0, 1, name='right'),
            coord(1, 0, name='down'),
            coord(0, -1, name='left'), 'pickup', 'drop'
        ]

        self.action_space = spaces.Discrete(len(self.ACTIONS))

        self.reward = self.calculate_rewards()

        self.store_gif = store_gif

        self.render_flipping = render_flipping
        self.env_id = None
        self.fig, self.ax, self.ims = None, None, None
        self.ep_no = 0
        self.step_num = 0
        if self.store_gif:
            self.allow_gif_storage()
Example #4
0
    def reset(self, render_next=False):
        """
        reset the environment
        """
        # save episode as gif
        if self.store_gif is True and self.step_num != 0:
            # print('debug_final', len(self.ims))
            anim = animation.ArtistAnimation(self.fig,
                                             self.ims,
                                             interval=100000,
                                             blit=False,
                                             repeat_delay=1000)
            anim.save('renders/env{}/episode_{}_({}).gif'.format(
                self.env_id, self.ep_no, self.step_num),
                      writer=animation.PillowWriter(),
                      dpi=100)

        if self.render_flipping is True:
            self.store_gif = render_next

        if self.fixed_goal:
            self.desired_goal = np.zeros(shape=(1, len(self.task_list)),
                                         dtype=int)
            for goal in self.fixed_goal:
                self.desired_goal[0][self.task_list.index(goal)] = 1
        else:
            self.desired_goal = np.random.randint(2,
                                                  size=(1,
                                                        len(self.task_list)))

        self.achieved_goal = self.observation_space.spaces['achieved_goal'].low

        if self.fixed_init_state is not None:
            self.obs = copy.deepcopy(self.fixed_init_state)
            self.agent_pos = coord(
                int(np.where(np.argmax(self.obs, axis=2) == 8)[0]),
                int(np.where(np.argmax(self.obs, axis=2) == 8)[1]),
                self.num_rows - 1, self.num_cols - 1)
        else:
            self.obs, self.agent_pos = self.sample_state()

        self.observation = {
            'observation': self.obs,
            'desired_goal': self.desired_goal,
            'achieved_goal': self.achieved_goal
        }

        self.init_observation = copy.deepcopy(self.observation)

        self.reward = self.calculate_rewards()

        if self.step_num != 0:  # don't increment episode number if resetting after init
            self.ep_no += 1

        self.step_num = 0

        # reset gif
        plt.close('all')
        if self.store_gif:
            # if self.fig is None:
            #     self.fig, self.ax = plt.subplots(1)
            # else:
            #     plt.clf()
            self.fig, self.ax = plt.subplots(1)
            self.ims = []
            self.__render_gif()
    def imagine_obs(self):
        init_objects = {
            obj: self.get_objects(code,
                                  self.init_observation_vector['observation'])
            for code, obj in enumerate(OBJECTS)
        }
        agent_pos = self.agent_pos
        final_objects = copy.deepcopy(init_objects)

        tasks = {
            self.task_list[idx]: value
            for idx, value in enumerate(self.desired_goal_vector[0])
        }
        for key, value in tasks.items():
            if value == 1:
                if key == 'MakeBread':
                    final_objects = self.__convert_item(
                        final_objects, 'wheat', 'bread')
                if key == 'EatBread':
                    final_objects = self.__convert_item(final_objects, 'bread')
                if key == 'ChopTree':
                    final_objects = self.__convert_item(
                        final_objects, 'tree', 'sticks')
                if key == 'ChopRock':
                    final_objects = self.__convert_item(final_objects, 'rock')

        occupied_spaces = []
        for i in final_objects.values():
            occupied_spaces += i

        moving_tasks = {
            'MoveAxe': 'axe',
            'MoveHammer': 'hammer',
            'MoveSticks': 'sticks'
        }
        for key, value in moving_tasks.items():
            if key in tasks:
                if tasks[key] == 1:
                    current_location = random.choice(final_objects[value])
                    occupied = True
                    while occupied:
                        new_location = [
                            random.randint(0, self.num_rows - 1),
                            random.randint(0, self.num_cols - 1)
                        ]
                        if new_location not in occupied_spaces:
                            final_objects[value].remove(current_location)
                            occupied_spaces.remove(current_location)
                            final_objects[value].append(new_location)
                            occupied_spaces.append(new_location)
                            break

        for key, value in tasks.items():
            if value == 1:
                if key == 'BuildHouse':
                    final_objects = self.__convert_item(
                        final_objects, 'sticks', 'house')

                if key == 'GoToHouse':
                    new_agent_pos = random.choice(final_objects['house'])
                    agent_pos = coord(new_agent_pos[0], new_agent_pos[1],
                                      self.num_rows - 1, self.num_cols - 1)

        # self.__object_list_to_state(final_objects, agent_pos)

        # self.__object_list_to_state(final_objects, agent_pos)

        # return final_objects, agent_pos
        return self.__object_list_to_state(final_objects, agent_pos)