def step(self, observation, info: Dict[str, Any], detailed_commands=False):
        self.info = info
        self.reading = 'and start reading' in observation

        # retrieve the information about the inventory, description, recipe and location (different approaches for different HCPs)
        self.inventory, self.description = self._get_inventory_and_description(observation, info)
        inventory = [self.remove_articles(inv.strip()) for inv in self.inventory.strip().split('\n') if not 'carrying' in inv]
        self.recipe = self._get_recipe(observation)
        location = Navigator.extract_location(self.description)

        nav_commands = self.navigator.get_navigational_commands(self.description)

        items = None
        if self._know_recipe():
            items, utils = self.item_scorer(recipe=self.recipe,
                                            inventory=self.inventory)
            # update the needed utils
            self._update_util_locations(self.description, utils, location)
        state_description = self.build_state_description(self.description, items, location, observation, inventory)
        possible_commands = self.get_commands(self.description, items, location, inventory, nav_commands)
        score, prob, value, high_level_command, index = self.model(state_description, possible_commands)
        cmds = flist()
        cmds.append(self.command_to_action(command=high_level_command,
                                           items=items,
                                           inventory=inventory,
                                           description=self.description))

        learning_info = LearningInfo(score=score,
                                     prob=prob,
                                     value=value,
                                     action=high_level_command,
                                     index=index,
                                     possible_actions=possible_commands)

        self.reading = (high_level_command == 'examine cookbook')
        self.step_count += 1
        self.cmd_memory.append(high_level_command)

        if detailed_commands:
            hl2ll = {hl_cmd: self.command_to_action(command=hl_cmd,
                                                    items=items,
                                                    inventory=inventory,
                                                    description=self.description)
                     for hl_cmd in possible_commands}

            return cmds, learning_info, hl2ll

        return cmds, learning_info
Beispiel #2
0
    def step(self, observation, info: Dict[str, Any], detailed_commands=False):
        """"
        :param observation: observation from the environment
        :param info: info dictionary from the environment.
        :return: One or multiple low level cmds that correspond to a single high level action and the model infos needed
        for the A2C learning.
        """
        self.info = info
        self.reading = 'and start reading' in observation

        # retrieve the information about the inventory, description, recipe and location (different approaches for different HCPs)
        self.inventory, self.description = self._get_inventory_and_description(
            observation, info)
        inventory = [
            self.remove_articles(inv.strip())
            for inv in self.inventory.strip().split('\n')
            if not 'carrying' in inv
        ]
        self.recipe = self._get_recipe(observation)
        location = Navigator.extract_location(self.description)

        if (len(self.cmd_memory) == 0):
            self.state.step(self.description.strip(),
                            pruned=self.params['pruned'])
        else:
            self.state.step(self.description.strip(),
                            prev_action=self.cmd_memory[-1],
                            pruned=self.params['pruned'])
        total_frames = 0  # have to update this somehow later
        epsilon = self.e_scheduler.value(total_frames)
        state_embedding, possible_commands = self.kg.act(self.state, epsilon)

        # nav_commands = self.navigator.get_navigational_commands(self.description)

        items = None
        if self._know_recipe():
            # Invoke the neural model to determine from the recipe and inventory which items we need to pickup and
            # what actions need to performed on them to satisfy the recipe.
            items, utils = self.item_scorer(recipe=self.recipe,
                                            inventory=self.inventory)
            # update the needed utils
            self._update_util_locations(self.description, utils, location)

        # build the representation of the current game state (dictionary of strings)
        state_description = self.build_state_description(
            self.description, items, state_embedding, observation, inventory)

        # generate a list of possible commands for the current game state
        # possible_commands = self.get_commands(self.description, items, location, inventory, nav_commands)

        # ask the model for the next command
        score, prob, value, high_level_command, index = self.model(
            state_description, possible_commands)
        cmds = flist()
        # translate the chosen high level command to a (set of) low level commands
        cmds.append(
            self.command_to_action(command=high_level_command,
                                   items=items,
                                   inventory=inventory,
                                   description=self.description))

        # save the learning necessary for the A2C update of the model
        learning_info = LearningInfo(score=score,
                                     prob=prob,
                                     value=value,
                                     action=high_level_command,
                                     index=index,
                                     possible_actions=possible_commands)

        self.reading = (high_level_command == 'examine cookbook')
        self.step_count += 1
        self.cmd_memory.append(high_level_command)

        if detailed_commands:
            hl2ll = {
                hl_cmd: self.command_to_action(command=hl_cmd,
                                               items=items,
                                               inventory=inventory,
                                               description=self.description)
                for hl_cmd in possible_commands
            }

            return cmds, learning_info, hl2ll

        return cmds, learning_info