Ejemplo n.º 1
0
class ItemScorerModel(nn.Module):
    x_keys = ['recipe_directions', 'inventory']

    def __init__(self, encoder_hidden_dim, device, linear_hidden_dim=32):
        super(ItemScorerModel, self).__init__()

        # translator model for mapping from desired actions performed on ingredients to commands that the parser understands
        self.translator = CmdTranslator.initialize_trained_model(device)

        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim

        # binary classifier determining for every direction in the recipe if it is still necessary to perform it
        self.action_scorer = nn.Sequential(
            nn.Linear(in_features=2 * encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=1),
            nn.Sigmoid())

        self.device = device
        self.to(self.device)

    def forward(self, x, return_actions=False):
        def unpadded_sequence_length(tensor):
            return ((tensor == 0).type(torch.int) <= 0).sum(dim=1)

        def encoder(list_of_str, key):
            """ Encodes a list of strings with the bert. """
            tokenized = self.tokenizer.encode_commands(list_of_str)
            hidden = self.tokenizer.tokenize(tokenized)
            hidden = hidden.permute(1, 0, 2).reshape(
                hidden.size(1), -1)  # correct for bididrectional
            return hidden

        scores = []
        pickups = []
        cmds = []
        for item, directions, inventory in zip(x['item'],
                                               x['recipe_directions'],
                                               x['inventory']):

            # encode the recipe directions

            # replace specific ingredient name from the string for more robustness and better generalization
            clnd_directions = [
                direction.replace(item, 'item').strip()
                for direction in directions.split(' <SEP> ')
                if item in direction
            ]
            if len(clnd_directions) == 0:
                # no recipe direction to perform on the ingredient
                clnd_directions = ['nothing']
                clnd_directions_to_encode = ['nothing']
            else:
                clnd_directions_to_encode = [
                    d.split()[0] for d in clnd_directions
                ]

            # encode the recipe directions for the current ingredient
            encoded_directions = encoder(clnd_directions_to_encode)

            # encode the inventory
            # remove specific ingredient name from the string for more robustness and better generalization
            clnd_inventory = [
                inv.replace(item, '').strip()
                for inv in inventory.split(' <SEP> ') if item in inv
            ]
            if len(clnd_inventory) == 0:
                # ingredient is not in the inventory yet
                clnd_inventory = ['nothing']
            else:
                clnd_inventory = [clnd_inventory[0]]

            # encode the inventory for the current ingredient
            encoded_inventory = encoder(clnd_inventory)[0, :]

            # concatenate the encodings of the inventory to the encoding of every recipe direction
            stckd = torch.cat(
                (encoded_directions,
                 torch.stack(
                     [encoded_inventory] * encoded_directions.shape[0])),
                dim=-1)

            if clnd_directions != ['nothing']:
                # compute the binary score of the recipe directions (determines for every direction if it is needed or not)
                score = self.action_scorer(stckd)
            else:
                score = torch.Tensor([[0]]).type(torch.FloatTensor)

            scores.append(score)

            # pickup is only determined by whether the ingredient is in the inventory or not
            pickups.append(item not in inventory)

            if return_actions:
                # map the output to the actual commmands
                cmds.append(
                    self.to_action(pickups[-1], clnd_directions, scores[-1],
                                   item))

        scores = pad_sequence(scores, batch_first=True,
                              padding_value=0).squeeze().type(
                                  torch.FloatTensor).to(self.device)

        if return_actions:
            return scores, cmds

        return scores

    def to_action(self, pickup, directions, scores, item):
        """
        Applies a threshold (of 0.5) to the output score of the action scorer. Above the threshold the respective recipe
        direction is mapped to an actual command via the translator model.
        """
        cmds = []
        thr = 0.5
        if pickup:
            cmds.append('take {}'.format(item))
        if directions == ['nothing']:
            return cmds
        _, _, _direction = self.translator(directions)
        [
            cmds.append(cmd.replace('item', item))
            for (cmd_score, cmd) in zip(scores, _direction) if cmd_score >= thr
        ]
        return cmds
Ejemplo n.º 2
0
class CmdTranslator(nn.Module):
    """
    Translates recipe actions to commands that the environment understand.
    E.g. 'fry the yellow omelette' -> 'cook the yellow omelette with stove'
         'dice the juicy red apple' -> 'dice the juicy red apple with knife'
    """
    verbs = ['slice', 'dice', 'chop', 'cook']
    utils = ['knife', 'oven', 'stove', 'BBQ']

    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):

        super(CmdTranslator, self).__init__()

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim

        self.device = device

        # determines which of the 4 utils ('knife', 'oven', 'stove', 'BBQ') needs to be used for the command
        self.util_decoder = nn.Sequential(
            nn.Linear(in_features=self.embedding_dim,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        # determines which of the 4 actions ('slice', 'dice', 'chop', 'cook') needs to be used for the command
        self.verb_decoder = nn.Sequential(
            nn.Linear(in_features=self.embedding_dim,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        self.to(self.device)

    def forward(self, directions):
        '''
        Takes a list of recipe directions (e.g. ['fry the item', 'slice the item']) and returns the most likely commands
        (['cook the item with stove', 'slice the item with knife']).
        '''
        def unpadded_sequence_length(tensor):
            return ((tensor == 0).type(torch.int) <= 0).sum(dim=1)

        # encode the input
        tokenized = self.tokenizer.encode_commands(directions)
        hidden = self.tokenizer.tokenize(tokenized)
        encoded = hidden.permute(1, 0, 2).reshape(hidden.size(1), -1)

        # compute the scores for the verbs and utils
        verb_distribution = self.verb_decoder(encoded)
        util_distribution = self.util_decoder(encoded)

        # use the verb and util with the highest probability for the returned command
        verb_idx = torch.argmax(verb_distribution, dim=-1)
        util_idx = torch.argmax(util_distribution, dim=-1)
        cmds = [
            '{} the item with {}'.format(self.verbs[verb_idx[idx]],
                                         self.utils[util_idx[idx]])
            for idx in range(len(directions))
        ]
        return verb_distribution, util_distribution, cmds

    @classmethod
    def initialize_trained_model(cls, device):
        """ Initializes the model from the pre-trained weights. """
        model = cls(device=device)
        model_path = os.path.join(_FILE_PREFIX,
                                  'weights/translator_weights_16')
        model.load_state_dict(torch.load(model_path, map_location=device),
                              strict=True)
        print('Loaded model from {}'.format(model_path))
        return model