def test_list(self): """should assign `can_end` correctly if tasks_can_end is a list.""" expected_tasks_can_end = [True, False, True] self.tasklist = TaskList(self.tasks, can_end=expected_tasks_can_end) for i, task in enumerate(self.tasklist.tasks): value = self.tasklist._get_task_can_end(task) check.equal(value, expected_tasks_can_end[i])
def test_list(self): """should assign weights correctly if tasks_weights is a list.""" expected_tasks_weights = [0.2, 0.1, 5] self.tasklist = TaskList(self.tasks, weights=expected_tasks_weights) for i, task in enumerate(self.tasklist.tasks): value = self.tasklist._get_task_weight(task) expected = expected_tasks_weights[i] check.equal(value, expected)
def test_all(self): """should return True only if all dones are True if early_stopping is 'all'.""" tests = TaskList(None, early_stopping="all") done = tests.dones = [True, False, True] check.is_false(tests._stacked_dones()) done = tests.dones = [True, True, True] check.is_true(tests._stacked_dones())
def test_dict(self): """should assign weights correctly if tasks_weights is a dict.""" expected_tasks_weights = { task.name: weight for task, weight in zip(self.tasks, [0.2, 0.1, 5]) } self.tasklist = TaskList(self.tasks, weights=expected_tasks_weights) for task in self.tasklist.tasks: value = self.tasklist._get_task_weight(task) expected = expected_tasks_weights[task.name] check.equal(value, expected)
def test_dict(self): """should assign `can_end` correctly if tasks_can_end is a dict.""" expected_tasks_can_end = { task.name: can_end for task, can_end in zip(self.tasks, [True, False, True]) } self.tasklist = TaskList(self.tasks, can_end=expected_tasks_can_end) for task in self.tasklist.tasks: value = self.tasklist._get_task_can_end(task) expected = expected_tasks_can_end[task.name] check.equal(value, expected)
def test_call(self, mocker: MockerFixture): """should return accumulated rewards and done on call.""" mocker.patch("crafting.task.TaskList._get_task_weight", lambda *args: 1) mocker.patch("crafting.task.TaskList._get_task_can_end", lambda *args: True) mocker.patch("crafting.task.TaskList._stacked_dones", lambda *args: True) tasks = TaskList(self.tasks) reward, done = tasks(self.observation, self.previous_observation, self.action) check.equal(reward, 10.2) check.is_true(done)
class TestTaskListGetTaskWeight: """TaskList._get_task_weight""" def setup(self): """Setup dummy tasks""" self.world = DummyWorld() self.task_observe_123 = Task("obs_123", self.world) self.task_prev_observe_312 = Task("prev_obs_312", self.world) self.task_action_observe_213 = Task("action_213", self.world) self.tasks = [ self.task_observe_123, self.task_prev_observe_312, self.task_action_observe_213, ] def test_list(self): """should assign weights correctly if tasks_weights is a list.""" expected_tasks_weights = [0.2, 0.1, 5] self.tasklist = TaskList(self.tasks, weights=expected_tasks_weights) for i, task in enumerate(self.tasklist.tasks): value = self.tasklist._get_task_weight(task) expected = expected_tasks_weights[i] check.equal(value, expected) def test_dict(self): """should assign weights correctly if tasks_weights is a dict.""" expected_tasks_weights = { task.name: weight for task, weight in zip(self.tasks, [0.2, 0.1, 5]) } self.tasklist = TaskList(self.tasks, weights=expected_tasks_weights) for task in self.tasklist.tasks: value = self.tasklist._get_task_weight(task) expected = expected_tasks_weights[task.name] check.equal(value, expected) def test_none(self): """should assign weights of 1 if tasks_weights is None.""" self.tasklist = TaskList(self.tasks) for task in self.tasklist.tasks: value = self.tasklist._get_task_weight(task) check.equal(value, 1)
class TestTaskListGetTaskCanEnd: """TaskList._get_task_can_end""" def setup(self): """Setup dummy tasks""" self.world = DummyWorld() self.task_observe_123 = Task("obs_123", self.world) self.task_prev_observe_312 = Task("prev_obs_312", self.world) self.task_action_observe_213 = Task("action_213", self.world) self.tasks = [ self.task_observe_123, self.task_prev_observe_312, self.task_action_observe_213, ] def test_list(self): """should assign `can_end` correctly if tasks_can_end is a list.""" expected_tasks_can_end = [True, False, True] self.tasklist = TaskList(self.tasks, can_end=expected_tasks_can_end) for i, task in enumerate(self.tasklist.tasks): value = self.tasklist._get_task_can_end(task) check.equal(value, expected_tasks_can_end[i]) def test_dict(self): """should assign `can_end` correctly if tasks_can_end is a dict.""" expected_tasks_can_end = { task.name: can_end for task, can_end in zip(self.tasks, [True, False, True]) } self.tasklist = TaskList(self.tasks, can_end=expected_tasks_can_end) for task in self.tasklist.tasks: value = self.tasklist._get_task_can_end(task) expected = expected_tasks_can_end[task.name] check.equal(value, expected) def test_none(self): """should assign False to all if tasks_can_end is None.""" self.tasklist = TaskList(self.tasks) for task in self.tasklist.tasks: check.is_false(self.tasklist._get_task_can_end(task))
def test_any(self): """should return True if any dones is True if early_stopping is 'any'.""" tests = TaskList(None, early_stopping="any") tests.dones = [True, False, True] check.is_true(tests._stacked_dones()) tests.dones = [False, False, False] check.is_false(tests._stacked_dones())
def __init__( self, world: "World", player: Player, name: str = "Crafting", max_step: int = 500, verbose: int = 0, observe_legal_actions: bool = False, tasks: List[Union[str, "Task"]] = None, fail_penalty: float = 9, timestep_penalty: float = 1, moving_penalty: float = 9, seed: int = None, ): """Generic Crafting Environment. Args: world: The world containing items, crafts and zones. player: The player containing an inventory and a position. max_step: The maximum number of steps until done. verbose: Verbosity level. {0: quiet, 1: print actions results}. observe_legal_actions: If True, add legal actions to observations. tasks: List of tasks. fail_penalty: Reward penalty for each non-successful action. timestep_penalty: Reward penalty for each timestep. moving_penalty: Reward penalty for moving to an other zone. """ self.name = name # World self.world = deepcopy(world) self.initial_world = deepcopy(world) # Player self.player = deepcopy(player) self.initial_player = deepcopy(player) # Tasks if not isinstance(tasks, TaskList): self.tasks = TaskList(tasks=tasks) else: self.tasks = tasks # Reward penalties self.fail_penalty = fail_penalty self.timestep_penalty = timestep_penalty self.moving_penalty = moving_penalty # Other properties self.max_step = max_step self.steps = 1 self.verbose = verbose self.observe_legal_actions = observe_legal_actions # Action space # (get_item or use_recipe or move_to_zone) self.action_space = spaces.Discrete(self.world.n_foundable_items + self.world.n_recipes + self.world.n_zones) # Observation space # (n_stacks_per_item, inv_filled_proportion, one_hot_zone) self.observation_space = spaces.Box( low=np.array([0 for _ in range(self.world.n_items)] + [0 for _ in range(self.world.n_zones)] + [0 for _ in range(self.world.n_zone_properties)]), high=np.array([np.inf for _ in range(self.world.n_items)] + [1 for _ in range(self.world.n_zones)] + [1 for _ in range(self.world.n_zone_properties)]), dtype=np.float32, ) if self.observe_legal_actions: self.legal_actions_space = spaces.MultiBinary(self.action_space.n) self.observation_space = spaces.Tuple( (self.observation_space, self.legal_actions_space)) self.observation_legend = np.concatenate(( [str(item) for item in self.world.items], [str(zone) for zone in self.world.zones], [str(prop) for prop in self.world.zone_properties], )) # Rendering self.render_variables = None # Seeding self.rng_seeds = self.seed(seed) self.action_space.seed(seed) self.observation_space.seed(seed)
class CraftingEnv(gym.Env): """Generic Crafting Environment""" metadata = {"render.modes": ["rgb_array"], "video.frames_per_second": 10} def __init__( self, world: "World", player: Player, name: str = "Crafting", max_step: int = 500, verbose: int = 0, observe_legal_actions: bool = False, tasks: List[Union[str, "Task"]] = None, fail_penalty: float = 9, timestep_penalty: float = 1, moving_penalty: float = 9, seed: int = None, ): """Generic Crafting Environment. Args: world: The world containing items, crafts and zones. player: The player containing an inventory and a position. max_step: The maximum number of steps until done. verbose: Verbosity level. {0: quiet, 1: print actions results}. observe_legal_actions: If True, add legal actions to observations. tasks: List of tasks. fail_penalty: Reward penalty for each non-successful action. timestep_penalty: Reward penalty for each timestep. moving_penalty: Reward penalty for moving to an other zone. """ self.name = name # World self.world = deepcopy(world) self.initial_world = deepcopy(world) # Player self.player = deepcopy(player) self.initial_player = deepcopy(player) # Tasks if not isinstance(tasks, TaskList): self.tasks = TaskList(tasks=tasks) else: self.tasks = tasks # Reward penalties self.fail_penalty = fail_penalty self.timestep_penalty = timestep_penalty self.moving_penalty = moving_penalty # Other properties self.max_step = max_step self.steps = 1 self.verbose = verbose self.observe_legal_actions = observe_legal_actions # Action space # (get_item or use_recipe or move_to_zone) self.action_space = spaces.Discrete(self.world.n_foundable_items + self.world.n_recipes + self.world.n_zones) # Observation space # (n_stacks_per_item, inv_filled_proportion, one_hot_zone) self.observation_space = spaces.Box( low=np.array([0 for _ in range(self.world.n_items)] + [0 for _ in range(self.world.n_zones)] + [0 for _ in range(self.world.n_zone_properties)]), high=np.array([np.inf for _ in range(self.world.n_items)] + [1 for _ in range(self.world.n_zones)] + [1 for _ in range(self.world.n_zone_properties)]), dtype=np.float32, ) if self.observe_legal_actions: self.legal_actions_space = spaces.MultiBinary(self.action_space.n) self.observation_space = spaces.Tuple( (self.observation_space, self.legal_actions_space)) self.observation_legend = np.concatenate(( [str(item) for item in self.world.items], [str(zone) for zone in self.world.zones], [str(prop) for prop in self.world.zone_properties], )) # Rendering self.render_variables = None # Seeding self.rng_seeds = self.seed(seed) self.action_space.seed(seed) self.observation_space.seed(seed) def seed(self, seed: int = None) -> List[int]: """Seed the environment for random reproductibility. Args: seed (int, optional): Seed to base the randomness on, if None generate a random seed. Defaults to None. Returns: List[int]: List of seeds used by this environment. """ self.np_random = np.random.RandomState(seed) return [seed] def action(self, action_type: str, identification: int) -> int: """Return action_id from action type and identifier. Args: action_type: One of {'get', 'craft', 'move'}. identification: Id of the item, recipe or zone. Returns: The corresponding discrete action ID. """ return self.world.action(action_type, identification) def action_from_id(self, action_id: int) -> str: """Return action_id from action type and identifier. Args: action_id: A discrete action ID. Return: The action type and object concerned by the action. """ return self.world.action_from_id(action_id) def step( self, action: np.ndarray ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, dict]: previous_observation = self.get_observation() reward = 0 # Get an item if action < self.world.n_foundable_items: item_slot = action item = self.world.foundable_items[item_slot] tool = self.player.choose_tool(item) n_found = self.player.search_for(item, tool) success = n_found > 0 if self.verbose > 0: status_msg = "SUCCEDED" if success else "FAILED" print(f"{status_msg} at getting {item}[{n_found}] with {tool}") # Craft a recipe start_index = self.world.n_foundable_items if 0 <= action - start_index < self.world.n_recipes: recipe_slot = action - start_index recipe = self.world.recipes[recipe_slot] success = self.player.craft(recipe) if self.verbose > 0: status_msg = "SUCCEDED" if success else "FAILED" print(f"{status_msg} at crafting {recipe}") # Change zone start_index = self.world.n_foundable_items + self.world.n_recipes if 0 <= action - start_index < self.world.n_zones: zone_slot = action - start_index zone = self.world.zones[zone_slot] success = self.player.move_to(zone) reward -= self.moving_penalty if self.verbose > 0: status_msg = "SUCCEDED" if success else "FAILED" print(f"{status_msg} at moving to {zone}") # Synchronise world zone with player zone zone_slot = self.world.zone_id_to_slot[self.player.zone.zone_id] self.world.zones[zone_slot] = self.player.zone # Obtain new observation observation = self.get_observation() # Tasks tasks_reward, tasks_done = self.tasks(observation, previous_observation, action) reward += tasks_reward reward -= self.timestep_penalty if not success: reward -= self.fail_penalty self.player.score += int(reward) # Termination done = self.steps >= self.max_step or tasks_done # Infos action_is_legal = self.action_masks() infos = { "env_step": self.steps, "action_is_legal": action_is_legal, "tasks_done": tasks_done, } self.steps += 1 if self.observe_legal_actions: observation = (observation, action_is_legal) return observation, reward, done, infos def add_task(self, task: "Task", weight: float = 1.0, can_end: bool = False): """Add a new task to the Crafting environment. Args: task (Task): Task to be added, must be an instance of Task. weight (float, optional): Weight of this task rewards. Defaults to 1.0. can_end (bool, optional): If True, this task could make the env done when completed. See TaskList early_stopping for more details. Defaults to False. """ self.tasks.add(task, weight, can_end) def action_masks(self) -> np.ndarray: """Return the legal actions""" can_get = np.array([ self.player.can_get(item, self.player.choose_tool(item)) for item in self.world.foundable_items ]) can_craft = np.array( [self.player.can_craft(recipe) for recipe in self.world.recipes]) can_move = np.array( [self.player.can_move_to(zone) for zone in self.world.zones]) return np.concatenate((can_get, can_craft, can_move)) def get_observation(self) -> np.ndarray: """Return the current observation""" one_hot_zone = np.zeros(self.world.n_zones, np.float32) zone_slot = self.world.zone_id_to_slot[self.player.zone.zone_id] one_hot_zone[zone_slot] = 1 inventory_content = self.player.inventory.content zone_properties = np.zeros(self.world.n_zone_properties) for i, prop in enumerate(self.world.zone_properties): if prop in self.player.zone.properties: zone_properties[i] = self.player.zone.properties[prop] observation = np.concatenate( (inventory_content, one_hot_zone, zone_properties), axis=-1) return observation def reset(self) -> np.ndarray: self.steps = 0 self.player = deepcopy(self.initial_player) self.world = deepcopy(self.initial_world) self.tasks.reset() observation = self.get_observation() if self.observe_legal_actions: observation = (observation, self.action_masks()) return observation def render(self, mode="rgb_array"): if mode == "human": # for human interaction raise NotImplementedError if mode == "console": # for console print return str(self.player) if mode == "rgb_array": if self.render_variables is None: self.render_variables = create_window(self) update_rendering( env=self, fps=self.metadata["video.frames_per_second"], **self.render_variables, ) rgb_array = surface_to_rgb_array(self.render_variables["screen"]) return rgb_array return super().render(mode=mode) # just raise an exception def __call__(self, action): return self.step(action)
def test_none(self): """should assign weights of 1 if tasks_weights is None.""" self.tasklist = TaskList(self.tasks) for task in self.tasklist.tasks: value = self.tasklist._get_task_weight(task) check.equal(value, 1)
def test_call_none_task(self): """should return (0, False) if tasks is None.""" tasks = TaskList(None) reward, done = tasks(self.observation, self.previous_observation, self.action) check.equal(reward, 0) check.is_false(done)
def test_init_raise_not_task(self): """should raise TypeError if a task doesn't subclass crafting.Task.""" tasks = [self.task_observe_123, "task_str"] with pytest.raises(TypeError, match=r".*must be.*crafting.Task.*"): TaskList(tasks)
def test_init(self): """should instanciate correctly.""" TaskList(self.tasks)
def test_raise_othervalue(self): """should raise ValueError if early_stopping is not in ('any', 'all').""" tests = TaskList(None, early_stopping="x") tests.dones = [True, False, True] with pytest.raises(ValueError, match=r"Unknown value for early_stopping.*"): tests._stacked_dones()
def test_none(self): """should assign False to all if tasks_can_end is None.""" self.tasklist = TaskList(self.tasks) for task in self.tasklist.tasks: check.is_false(self.tasklist._get_task_can_end(task))