def step(self, observation, info: Dict[str, Any], detailed_commands=False): self.info = info self.reading = 'and start reading' in observation # retrieve the information about the inventory, description, recipe and location (different approaches for different HCPs) self.inventory, self.description = self._get_inventory_and_description(observation, info) inventory = [self.remove_articles(inv.strip()) for inv in self.inventory.strip().split('\n') if not 'carrying' in inv] self.recipe = self._get_recipe(observation) location = Navigator.extract_location(self.description) nav_commands = self.navigator.get_navigational_commands(self.description) items = None if self._know_recipe(): items, utils = self.item_scorer(recipe=self.recipe, inventory=self.inventory) # update the needed utils self._update_util_locations(self.description, utils, location) state_description = self.build_state_description(self.description, items, location, observation, inventory) possible_commands = self.get_commands(self.description, items, location, inventory, nav_commands) score, prob, value, high_level_command, index = self.model(state_description, possible_commands) cmds = flist() cmds.append(self.command_to_action(command=high_level_command, items=items, inventory=inventory, description=self.description)) learning_info = LearningInfo(score=score, prob=prob, value=value, action=high_level_command, index=index, possible_actions=possible_commands) self.reading = (high_level_command == 'examine cookbook') self.step_count += 1 self.cmd_memory.append(high_level_command) if detailed_commands: hl2ll = {hl_cmd: self.command_to_action(command=hl_cmd, items=items, inventory=inventory, description=self.description) for hl_cmd in possible_commands} return cmds, learning_info, hl2ll return cmds, learning_info
def step(self, observation, info: Dict[str, Any], detailed_commands=False): """" :param observation: observation from the environment :param info: info dictionary from the environment. :return: One or multiple low level cmds that correspond to a single high level action and the model infos needed for the A2C learning. """ self.info = info self.reading = 'and start reading' in observation # retrieve the information about the inventory, description, recipe and location (different approaches for different HCPs) self.inventory, self.description = self._get_inventory_and_description( observation, info) inventory = [ self.remove_articles(inv.strip()) for inv in self.inventory.strip().split('\n') if not 'carrying' in inv ] self.recipe = self._get_recipe(observation) location = Navigator.extract_location(self.description) if (len(self.cmd_memory) == 0): self.state.step(self.description.strip(), pruned=self.params['pruned']) else: self.state.step(self.description.strip(), prev_action=self.cmd_memory[-1], pruned=self.params['pruned']) total_frames = 0 # have to update this somehow later epsilon = self.e_scheduler.value(total_frames) state_embedding, possible_commands = self.kg.act(self.state, epsilon) # nav_commands = self.navigator.get_navigational_commands(self.description) items = None if self._know_recipe(): # Invoke the neural model to determine from the recipe and inventory which items we need to pickup and # what actions need to performed on them to satisfy the recipe. items, utils = self.item_scorer(recipe=self.recipe, inventory=self.inventory) # update the needed utils self._update_util_locations(self.description, utils, location) # build the representation of the current game state (dictionary of strings) state_description = self.build_state_description( self.description, items, state_embedding, observation, inventory) # generate a list of possible commands for the current game state # possible_commands = self.get_commands(self.description, items, location, inventory, nav_commands) # ask the model for the next command score, prob, value, high_level_command, index = self.model( state_description, possible_commands) cmds = flist() # translate the chosen high level command to a (set of) low level commands cmds.append( self.command_to_action(command=high_level_command, items=items, inventory=inventory, description=self.description)) # save the learning necessary for the A2C update of the model learning_info = LearningInfo(score=score, prob=prob, value=value, action=high_level_command, index=index, possible_actions=possible_commands) self.reading = (high_level_command == 'examine cookbook') self.step_count += 1 self.cmd_memory.append(high_level_command) if detailed_commands: hl2ll = { hl_cmd: self.command_to_action(command=hl_cmd, items=items, inventory=inventory, description=self.description) for hl_cmd in possible_commands } return cmds, learning_info, hl2ll return cmds, learning_info