Beispiel #1
0
    def __init__(self, device, model, item_scorer, hcp=4):
        self.device = device
        self.cmd_memory = flist()
        self.item_scorer = item_scorer
        # self.navigator = Navigator(navigation_model)
        self.utils = None
        self.hcp = hcp

        self.step_count = 0
        self.total_score = 0
        self.current_score = 0
        self.recipe = ''
        self.reading = False

        self.model = model
        self.description = 'nothing'
        self.description_updated = True

        self.inventory = 'nothing'
        self.inventory_updated = False
        self.info = None

        # added for KG part
        self.state = StateNAction()
        self.kg = KGDQN(params, self.state.all_actions).cuda()
        self.params = params
        self.num_frames = params['num_frames']
        if params['scheduler_type'] == 'exponential':
            self.e_scheduler = ExponentialSchedule(self.num_frames,
                                                   params['e_decay'],
                                                   params['e_final'])
        elif params['scheduler_type'] == 'linear':
            self.e_scheduler = LinearSchedule(self.num_frames,
                                              params['e_final'])
    def explore(self, description):
        """
        Determines the low level action that needs to be performed to execute one more step of a DFS from the
        current location. Additionally the neural model determines all closed doors in the current location and adds the
        respective open commands.
        """
        location = Navigator.extract_location(description)
        commands = flist()
        self.update_graph(location=location,
                          description=description)

        if len(self.graph[location].closed_doors) > 0:
            commands.append(self.open_doors(location))

        # explore the first unvisited direction
        if len(self.graph[location].unvisited_directions) > 0:
            direction = self.graph[location].unvisited_directions.pop(0)
        else:
            # if no unvisited direction available anymore -> go back to where we came from
            if self.graph[location].came_from == 'start':
                direction = None
            else:
                direction = self.graph[location].came_from

        commands.append(self.do_move(direction))

        if len(commands) == 0:
            # fallback
            commands.append('look')

        return commands
    def step(self, observation, info: Dict[str, Any], detailed_commands=False):
        self.info = info
        self.reading = 'and start reading' in observation

        # retrieve the information about the inventory, description, recipe and location (different approaches for different HCPs)
        self.inventory, self.description = self._get_inventory_and_description(observation, info)
        inventory = [self.remove_articles(inv.strip()) for inv in self.inventory.strip().split('\n') if not 'carrying' in inv]
        self.recipe = self._get_recipe(observation)
        location = Navigator.extract_location(self.description)

        nav_commands = self.navigator.get_navigational_commands(self.description)

        items = None
        if self._know_recipe():
            items, utils = self.item_scorer(recipe=self.recipe,
                                            inventory=self.inventory)
            # update the needed utils
            self._update_util_locations(self.description, utils, location)
        state_description = self.build_state_description(self.description, items, location, observation, inventory)
        possible_commands = self.get_commands(self.description, items, location, inventory, nav_commands)
        score, prob, value, high_level_command, index = self.model(state_description, possible_commands)
        cmds = flist()
        cmds.append(self.command_to_action(command=high_level_command,
                                           items=items,
                                           inventory=inventory,
                                           description=self.description))

        learning_info = LearningInfo(score=score,
                                     prob=prob,
                                     value=value,
                                     action=high_level_command,
                                     index=index,
                                     possible_actions=possible_commands)

        self.reading = (high_level_command == 'examine cookbook')
        self.step_count += 1
        self.cmd_memory.append(high_level_command)

        if detailed_commands:
            hl2ll = {hl_cmd: self.command_to_action(command=hl_cmd,
                                                    items=items,
                                                    inventory=inventory,
                                                    description=self.description)
                     for hl_cmd in possible_commands}

            return cmds, learning_info, hl2ll

        return cmds, learning_info
    def __init__(self, device, model, item_scorer, navigation_model, hcp=4):
        self.device = device
        self.cmd_memory = flist()
        self.item_scorer = item_scorer
        self.navigator = Navigator(navigation_model)
        self.utils = None
        self.hcp = hcp

        self.step_count = 0
        self.total_score = 0
        self.current_score = 0
        self.recipe = ''
        self.reading = False

        self.model = model
        self.description = 'nothing'
        self.description_updated = True

        self.inventory = 'nothing'
        self.inventory_updated = False
        self.info = None
Beispiel #5
0
    def step(self, observation, info: Dict[str, Any], detailed_commands=False):
        """"
        :param observation: observation from the environment
        :param info: info dictionary from the environment.
        :return: One or multiple low level cmds that correspond to a single high level action and the model infos needed
        for the A2C learning.
        """
        self.info = info
        self.reading = 'and start reading' in observation

        # retrieve the information about the inventory, description, recipe and location (different approaches for different HCPs)
        self.inventory, self.description = self._get_inventory_and_description(
            observation, info)
        inventory = [
            self.remove_articles(inv.strip())
            for inv in self.inventory.strip().split('\n')
            if not 'carrying' in inv
        ]
        self.recipe = self._get_recipe(observation)
        location = Navigator.extract_location(self.description)

        if (len(self.cmd_memory) == 0):
            self.state.step(self.description.strip(),
                            pruned=self.params['pruned'])
        else:
            self.state.step(self.description.strip(),
                            prev_action=self.cmd_memory[-1],
                            pruned=self.params['pruned'])
        total_frames = 0  # have to update this somehow later
        epsilon = self.e_scheduler.value(total_frames)
        state_embedding, possible_commands = self.kg.act(self.state, epsilon)

        # nav_commands = self.navigator.get_navigational_commands(self.description)

        items = None
        if self._know_recipe():
            # Invoke the neural model to determine from the recipe and inventory which items we need to pickup and
            # what actions need to performed on them to satisfy the recipe.
            items, utils = self.item_scorer(recipe=self.recipe,
                                            inventory=self.inventory)
            # update the needed utils
            self._update_util_locations(self.description, utils, location)

        # build the representation of the current game state (dictionary of strings)
        state_description = self.build_state_description(
            self.description, items, state_embedding, observation, inventory)

        # generate a list of possible commands for the current game state
        # possible_commands = self.get_commands(self.description, items, location, inventory, nav_commands)

        # ask the model for the next command
        score, prob, value, high_level_command, index = self.model(
            state_description, possible_commands)
        cmds = flist()
        # translate the chosen high level command to a (set of) low level commands
        cmds.append(
            self.command_to_action(command=high_level_command,
                                   items=items,
                                   inventory=inventory,
                                   description=self.description))

        # save the learning necessary for the A2C update of the model
        learning_info = LearningInfo(score=score,
                                     prob=prob,
                                     value=value,
                                     action=high_level_command,
                                     index=index,
                                     possible_actions=possible_commands)

        self.reading = (high_level_command == 'examine cookbook')
        self.step_count += 1
        self.cmd_memory.append(high_level_command)

        if detailed_commands:
            hl2ll = {
                hl_cmd: self.command_to_action(command=hl_cmd,
                                               items=items,
                                               inventory=inventory,
                                               description=self.description)
                for hl_cmd in possible_commands
            }

            return cmds, learning_info, hl2ll

        return cmds, learning_info