Example #1
0
    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):

        super(CmdTranslator, self).__init__()

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim

        self.device = device

        # determines which of the 4 utils ('knife', 'oven', 'stove', 'BBQ') needs to be used for the command
        self.util_decoder = nn.Sequential(
            nn.Linear(in_features=self.embedding_dim,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        # determines which of the 4 actions ('slice', 'dice', 'chop', 'cook') needs to be used for the command
        self.verb_decoder = nn.Sequential(
            nn.Linear(in_features=self.embedding_dim,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        self.to(self.device)
    def __init__(self, device, hidden_size=64, bidirectional=True, hidden_linear_size=128):

        super(Model, self).__init__()

        self.device = device
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.obs_encoded_hidden_size = self.hidden_size * len(self.STATE_LIST) * (2 if bidirectional else 1)
        self.cmd_encoded_hidden_size = self.hidden_size * (2 if bidirectional else 1)
        self.state_hidden = None
        # TO DO - add graph embedding_dim

        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim

        # # RNN that keeps track of the encoded state over time
        self.state_gru = nn.GRU(self.obs_encoded_hidden_size, self.obs_encoded_hidden_size, batch_first=True)

        # # Critic to determine a value for the current state
        # TO DO - add graph embedding_dim
        self.critic = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size, hidden_linear_size),
                                    nn.ReLU(),
                                    nn.Linear(hidden_linear_size, 1))

        # # # Scorer for the commands
        # TO DO - add graph embedding_dim
        self.att_cmd = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size + self.cmd_encoded_hidden_size, hidden_linear_size),
                                    nn.ReLU(),
                                    nn.Linear(hidden_linear_size, 1))

        self.to(self.device)
Example #3
0
    def __init__(self, encoder_hidden_dim, device, linear_hidden_dim=32):
        super(ItemScorerModel, self).__init__()

        # translator model for mapping from desired actions performed on ingredients to commands that the parser understands
        self.translator = CmdTranslator.initialize_trained_model(device)

        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim

        # binary classifier determining for every direction in the recipe if it is still necessary to perform it
        self.action_scorer = nn.Sequential(
            nn.Linear(in_features=2 * encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=1),
            nn.Sigmoid())

        self.device = device
        self.to(self.device)
Example #4
0
    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):
        super(NavigationModel, self).__init__()

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # encoder
        self.encoder = nn.GRU(self.embedding_dim,
                              encoder_hidden_dim,
                              batch_first=True,
                              bidirectional=True)
        self.device = device

        # 4 individual binary scorer for each direction (north, south, ...)
        self.nsew_scorer = nn.ModuleDict({
            k: nn.Sequential(
                nn.Linear(in_features=encoder_hidden_dim * 2,
                          out_features=linear_hidden_dim), nn.ReLU(),
                nn.Linear(in_features=linear_hidden_dim, out_features=1),
                nn.Sigmoid())
            for k in self.nsew
        })

        # Binary scorer that determines for every word in the input the probability if it is some form of a closed door
        # e.g. 'To[0] the[0] east[0] you[0] see[0] a[0] closed[0] sliding[1] patio[1] door[1]'
        #      'There[0] is[0] an[0] open[0] metal[0] door[0]'
        self.door_finder = nn.Sequential(
            nn.Linear(in_features=encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim), nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=1),
            nn.Sigmoid())

        self.to(self.device)
Example #5
0
class ItemScorerModel(nn.Module):
    x_keys = ['recipe_directions', 'inventory']

    def __init__(self, encoder_hidden_dim, device, linear_hidden_dim=32):
        super(ItemScorerModel, self).__init__()

        # translator model for mapping from desired actions performed on ingredients to commands that the parser understands
        self.translator = CmdTranslator.initialize_trained_model(device)

        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim

        # binary classifier determining for every direction in the recipe if it is still necessary to perform it
        self.action_scorer = nn.Sequential(
            nn.Linear(in_features=2 * encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=1),
            nn.Sigmoid())

        self.device = device
        self.to(self.device)

    def forward(self, x, return_actions=False):
        def unpadded_sequence_length(tensor):
            return ((tensor == 0).type(torch.int) <= 0).sum(dim=1)

        def encoder(list_of_str, key):
            """ Encodes a list of strings with the bert. """
            tokenized = self.tokenizer.encode_commands(list_of_str)
            hidden = self.tokenizer.tokenize(tokenized)
            hidden = hidden.permute(1, 0, 2).reshape(
                hidden.size(1), -1)  # correct for bididrectional
            return hidden

        scores = []
        pickups = []
        cmds = []
        for item, directions, inventory in zip(x['item'],
                                               x['recipe_directions'],
                                               x['inventory']):

            # encode the recipe directions

            # replace specific ingredient name from the string for more robustness and better generalization
            clnd_directions = [
                direction.replace(item, 'item').strip()
                for direction in directions.split(' <SEP> ')
                if item in direction
            ]
            if len(clnd_directions) == 0:
                # no recipe direction to perform on the ingredient
                clnd_directions = ['nothing']
                clnd_directions_to_encode = ['nothing']
            else:
                clnd_directions_to_encode = [
                    d.split()[0] for d in clnd_directions
                ]

            # encode the recipe directions for the current ingredient
            encoded_directions = encoder(clnd_directions_to_encode)

            # encode the inventory
            # remove specific ingredient name from the string for more robustness and better generalization
            clnd_inventory = [
                inv.replace(item, '').strip()
                for inv in inventory.split(' <SEP> ') if item in inv
            ]
            if len(clnd_inventory) == 0:
                # ingredient is not in the inventory yet
                clnd_inventory = ['nothing']
            else:
                clnd_inventory = [clnd_inventory[0]]

            # encode the inventory for the current ingredient
            encoded_inventory = encoder(clnd_inventory)[0, :]

            # concatenate the encodings of the inventory to the encoding of every recipe direction
            stckd = torch.cat(
                (encoded_directions,
                 torch.stack(
                     [encoded_inventory] * encoded_directions.shape[0])),
                dim=-1)

            if clnd_directions != ['nothing']:
                # compute the binary score of the recipe directions (determines for every direction if it is needed or not)
                score = self.action_scorer(stckd)
            else:
                score = torch.Tensor([[0]]).type(torch.FloatTensor)

            scores.append(score)

            # pickup is only determined by whether the ingredient is in the inventory or not
            pickups.append(item not in inventory)

            if return_actions:
                # map the output to the actual commmands
                cmds.append(
                    self.to_action(pickups[-1], clnd_directions, scores[-1],
                                   item))

        scores = pad_sequence(scores, batch_first=True,
                              padding_value=0).squeeze().type(
                                  torch.FloatTensor).to(self.device)

        if return_actions:
            return scores, cmds

        return scores

    def to_action(self, pickup, directions, scores, item):
        """
        Applies a threshold (of 0.5) to the output score of the action scorer. Above the threshold the respective recipe
        direction is mapped to an actual command via the translator model.
        """
        cmds = []
        thr = 0.5
        if pickup:
            cmds.append('take {}'.format(item))
        if directions == ['nothing']:
            return cmds
        _, _, _direction = self.translator(directions)
        [
            cmds.append(cmd.replace('item', item))
            for (cmd_score, cmd) in zip(scores, _direction) if cmd_score >= thr
        ]
        return cmds
Example #6
0
class CmdTranslator(nn.Module):
    """
    Translates recipe actions to commands that the environment understand.
    E.g. 'fry the yellow omelette' -> 'cook the yellow omelette with stove'
         'dice the juicy red apple' -> 'dice the juicy red apple with knife'
    """
    verbs = ['slice', 'dice', 'chop', 'cook']
    utils = ['knife', 'oven', 'stove', 'BBQ']

    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):

        super(CmdTranslator, self).__init__()

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim

        self.device = device

        # determines which of the 4 utils ('knife', 'oven', 'stove', 'BBQ') needs to be used for the command
        self.util_decoder = nn.Sequential(
            nn.Linear(in_features=self.embedding_dim,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        # determines which of the 4 actions ('slice', 'dice', 'chop', 'cook') needs to be used for the command
        self.verb_decoder = nn.Sequential(
            nn.Linear(in_features=self.embedding_dim,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        self.to(self.device)

    def forward(self, directions):
        '''
        Takes a list of recipe directions (e.g. ['fry the item', 'slice the item']) and returns the most likely commands
        (['cook the item with stove', 'slice the item with knife']).
        '''
        def unpadded_sequence_length(tensor):
            return ((tensor == 0).type(torch.int) <= 0).sum(dim=1)

        # encode the input
        tokenized = self.tokenizer.encode_commands(directions)
        hidden = self.tokenizer.tokenize(tokenized)
        encoded = hidden.permute(1, 0, 2).reshape(hidden.size(1), -1)

        # compute the scores for the verbs and utils
        verb_distribution = self.verb_decoder(encoded)
        util_distribution = self.util_decoder(encoded)

        # use the verb and util with the highest probability for the returned command
        verb_idx = torch.argmax(verb_distribution, dim=-1)
        util_idx = torch.argmax(util_distribution, dim=-1)
        cmds = [
            '{} the item with {}'.format(self.verbs[verb_idx[idx]],
                                         self.utils[util_idx[idx]])
            for idx in range(len(directions))
        ]
        return verb_distribution, util_distribution, cmds

    @classmethod
    def initialize_trained_model(cls, device):
        """ Initializes the model from the pre-trained weights. """
        model = cls(device=device)
        model_path = os.path.join(_FILE_PREFIX,
                                  'weights/translator_weights_16')
        model.load_state_dict(torch.load(model_path, map_location=device),
                              strict=True)
        print('Loaded model from {}'.format(model_path))
        return model
Example #7
0
class NavigationModel(nn.Module):
    """
    Model that learns to retrieve the following information from the description string:
    - cardinal directions (north, south, ...) to go from current location
    - closed doors in the current location
    """
    nsew = ['north', 'south', 'east', 'west']

    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):
        super(NavigationModel, self).__init__()

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # encoder
        self.encoder = nn.GRU(self.embedding_dim,
                              encoder_hidden_dim,
                              batch_first=True,
                              bidirectional=True)
        self.device = device

        # 4 individual binary scorer for each direction (north, south, ...)
        self.nsew_scorer = nn.ModuleDict({
            k: nn.Sequential(
                nn.Linear(in_features=encoder_hidden_dim * 2,
                          out_features=linear_hidden_dim), nn.ReLU(),
                nn.Linear(in_features=linear_hidden_dim, out_features=1),
                nn.Sigmoid())
            for k in self.nsew
        })

        # Binary scorer that determines for every word in the input the probability if it is some form of a closed door
        # e.g. 'To[0] the[0] east[0] you[0] see[0] a[0] closed[0] sliding[1] patio[1] door[1]'
        #      'There[0] is[0] an[0] open[0] metal[0] door[0]'
        self.door_finder = nn.Sequential(
            nn.Linear(in_features=encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim), nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=1),
            nn.Sigmoid())

        self.to(self.device)

    def forward(self, x):
        """
        Takes a list of standard description as input and returns:
         - list of list of closed doors in the current location, e.g. [['green sliding door'], ...]
         - list of list of directions to go from the current location, e.g. [['north', 'west'], ...]
         """
        def unpadded_sequence_length(tensor):
            return ((tensor == 0).type(torch.int) <= 0).sum(dim=1)

        x = clean_description(x)

        # encode the description on sentence level (=encoded) and word level (=out)
        tokenized = self.tokenizer.process_cmds(x, pad=True)
        lengths = unpadded_sequence_length(tokenized)
        embedded = self.embedding(tokenized)
        packed_sequence = pack_padded_sequence(input=embedded,
                                               lengths=lengths,
                                               batch_first=True,
                                               enforce_sorted=False)
        out, hidden = self.encoder(packed_sequence)
        encoded = hidden.permute(1, 0,
                                 2).reshape(hidden.size(1),
                                            -1)  # correct for bididrectional
        out = pad_packed_sequence(out)[0].permute(1, 0, 2)

        # determine scores for cardinal directions based on sentence encoding
        nsew_scores = {k: self.nsew_scorer[k](encoded) for k in self.nsew}

        # determine probabilities for every word that its a closed door (based on contextual word encoding)
        door_scores = []
        for b in range(len(x)):
            new_score = self.door_finder(out[b, :, :]).squeeze(1)
            door_scores.append(new_score)
        door_scores = torch.stack(door_scores)

        # Translate the scores to commands
        nsew, doors = self.to_commands(nsew_scores, door_scores, x)
        return door_scores, nsew_scores, doors, nsew

    def to_commands(self, nsew_scores, door_scores, x):
        """ Maps the scores of the neural models (cardinal directions & closed doors) to commands. """
        # probability thresholds
        nsew_thr = 0.5
        door_thr = 0.5

        nsew = []
        doors = []
        x_pad = np.array([['<PAD>'] * max([len(s.split()) for s in x])] *
                         len(x)).astype('<U60')
        for b in range(len(x)):
            for word_idx, word in enumerate(x[b].split()):
                x_pad[b, word_idx] = word

        for b in range(len(x)):
            nsew.append([k for k in self.nsew if nsew_scores[k][b] > nsew_thr])
            cmd = ' '.join([
                word for word, score in
                zip(list(x_pad[b]),
                    [v.item() for v in list(door_scores[b].detach())])
                if score > door_thr and word != '<PAD>'
            ])
            if cmd == '':
                doors.append([])
            else:
                doors.append([
                    c.strip() + ' door' for c in cmd.split('door')
                    if c.strip() != ''
                ])

        return nsew, doors

    @classmethod
    def initialize_trained_model(cls, device):
        """ Initializes the model from the pre-trained weights. """
        model = cls(device=device)
        model_path = os.path.join(_FILE_PREFIX,
                                  'weights/navigation_weights_16')
        model.load_state_dict(torch.load(model_path, map_location=device),
                              strict=True)
        print('Loaded model from {}'.format(model_path))
        return model