def __init__(self, encoder_hidden_dim, device, linear_hidden_dim=32):
        super(ItemScorerModel, self).__init__()

        # translator model for mapping from desired actions performed on ingredients to commands that the parser understands
        # self.translator = CmdTranslator(device)
        self.translator = CmdTranslator.initialize_trained_model(device)

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # RNNs
        self.encoder = nn.ModuleDict({
            k: nn.GRU(self.embedding_dim,
                      encoder_hidden_dim,
                      batch_first=True,
                      bidirectional=True)
            for k in ['recipe_directions', 'inventory']
        })

        # binary classifier determining for every direction in the recipe if it is still necessary to perform it
        self.action_scorer = nn.Sequential(
            nn.Linear(in_features=2 * encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=1),
            nn.Sigmoid())

        self.device = device
        self.to(self.device)
Exemple #2
0
    def __init__(self, device, hidden_size=64, bidirectional=True, hidden_linear_size=128):
        super(Model, self).__init__()

        # Parameters
        self.device = device
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.obs_encoded_hidden_size = self.hidden_size * len(self._KEYS) * (2 if bidirectional else 1)
        self.cmd_encoded_hidden_size = self.hidden_size * (2 if bidirectional else 1)
        self.state_hidden = None

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # Model
        # Encoder for the state dictionary
        self.observation_encoder = nn.ModuleDict(
            {k: nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional).to(
                self.device) for k in self._KEYS}
        )

        self.cmd_encoder = nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional)

        self.state_gru = nn.GRU(self.obs_encoded_hidden_size, self.obs_encoded_hidden_size, batch_first=True)

        self.critic = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size, hidden_linear_size),
                                    nn.ReLU(),
                                    nn.Linear(hidden_linear_size, 1))

        self.att_cmd = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size + self.cmd_encoded_hidden_size, hidden_linear_size),
                                    nn.ReLU(),
                                    nn.Linear(hidden_linear_size, 1))

        self.to(self.device)
Exemple #3
0
    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):
        super(NavigationModel, self).__init__()

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # encoder
        self.encoder = nn.GRU(self.embedding_dim,
                              encoder_hidden_dim,
                              batch_first=True,
                              bidirectional=True)
        self.device = device

        # 4 individual binary scorer for each direction (north, south, ...)
        self.nsew_scorer = nn.ModuleDict({
            k: nn.Sequential(
                nn.Linear(in_features=encoder_hidden_dim * 2,
                          out_features=linear_hidden_dim), nn.ReLU(),
                nn.Linear(in_features=linear_hidden_dim, out_features=1),
                nn.Sigmoid())
            for k in self.nsew
        })

        # Binary scorer that determines for every word in the input the probability if it is some form of a closed door
        # e.g. 'To[0] the[0] east[0] you[0] see[0] a[0] closed[0] sliding[1] patio[1] door[1]'
        #      'There[0] is[0] an[0] open[0] metal[0] door[0]'
        self.door_finder = nn.Sequential(
            nn.Linear(in_features=encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim), nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=1),
            nn.Sigmoid())

        self.to(self.device)
    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):

        super(CmdTranslator, self).__init__()

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # RNN to encode the input sentence
        self.encoder = nn.GRU(self.embedding_dim,
                              encoder_hidden_dim,
                              batch_first=True,
                              bidirectional=True)
        self.device = device

        # determines which of the 4 utils ('knife', 'oven', 'stove', 'BBQ') needs to be used for the command
        self.util_decoder = nn.Sequential(
            nn.Linear(in_features=encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        # determines which of the 4 actions ('slice', 'dice', 'chop', 'cook') needs to be used for the command
        self.verb_decoder = nn.Sequential(
            nn.Linear(in_features=encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        self.to(self.device)
Exemple #5
0
    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):
        super(NavigationModel, self).__init__()

        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        self.encoder = nn.GRU(self.embedding_dim, encoder_hidden_dim, batch_first=True, bidirectional=True)
        self.device = device

        self.nsew_scorer = nn.ModuleDict({
            k: nn.Sequential(
                nn.Linear(in_features=encoder_hidden_dim * 2,
                          out_features=linear_hidden_dim),
                nn.ReLU(),
                nn.Linear(in_features=linear_hidden_dim, out_features=1),
                nn.Sigmoid())
            for k in self.nsew
        })
Exemple #6
0
    def __init__(self,
                 device,
                 state_embedding_dim=100,
                 hidden_size=64,
                 bidirectional=True,
                 hidden_linear_size=128):
        super(Model, self).__init__()

        # Parameters
        self.device = device
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.obs_encoded_hidden_size = self.hidden_size * (
            len(self._KEYS) -
            1) * (2 if bidirectional else 1) + state_embedding_dim
        self.cmd_encoded_hidden_size = self.hidden_size * (2 if bidirectional
                                                           else 1)
        self.state_hidden = None

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # Model
        # Change state_embedding (graph embedding) dimension to embedding_dim
        # self.graph_encoder = nn.Linear(state_embedding_dim, self.embedding_dim)

        # Encoder for the state dictionary
        self.observation_encoder = nn.ModuleDict({
            k: nn.GRU(self.embedding_dim,
                      self.hidden_size,
                      batch_first=True,
                      bidirectional=bidirectional).to(self.device)
            for k in self._KEYS if k != 'state_embedding'
        })

        # Encoder for the commands
        self.cmd_encoder = nn.GRU(self.embedding_dim,
                                  self.hidden_size,
                                  batch_first=True,
                                  bidirectional=bidirectional)

        # RNN that keeps track of the encoded state over time
        self.state_gru = nn.GRU(self.obs_encoded_hidden_size,
                                self.obs_encoded_hidden_size,
                                batch_first=True)

        # Critic to determine a value for the current state
        self.critic = nn.Sequential(
            nn.Linear(self.obs_encoded_hidden_size, hidden_linear_size),
            nn.ReLU(), nn.Linear(hidden_linear_size, 1))

        # # Scorer for the commands
        self.att_cmd = nn.Sequential(
            nn.Linear(
                self.obs_encoded_hidden_size + self.cmd_encoded_hidden_size,
                hidden_linear_size), nn.ReLU(),
            nn.Linear(hidden_linear_size, 1))

        self.to(self.device)
Exemple #7
0
class Model(nn.Module):
    """
    Model that learns to map from the current game state (dictionary of strings) and a list of high level command to the
    command that has the highest expected return.
    """
    # keys of the dictionary of the current game state
    _KEYS = [
        'observation', 'missing_items', 'unnecessary_items', 'description',
        'previous_cmds', 'state_embedding', 'required_utils'
    ]

    def __init__(self,
                 device,
                 state_embedding_dim=100,
                 hidden_size=64,
                 bidirectional=True,
                 hidden_linear_size=128):
        super(Model, self).__init__()

        # Parameters
        self.device = device
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.obs_encoded_hidden_size = self.hidden_size * (
            len(self._KEYS) -
            1) * (2 if bidirectional else 1) + state_embedding_dim
        self.cmd_encoded_hidden_size = self.hidden_size * (2 if bidirectional
                                                           else 1)
        self.state_hidden = None

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # Model
        # Change state_embedding (graph embedding) dimension to embedding_dim
        # self.graph_encoder = nn.Linear(state_embedding_dim, self.embedding_dim)

        # Encoder for the state dictionary
        self.observation_encoder = nn.ModuleDict({
            k: nn.GRU(self.embedding_dim,
                      self.hidden_size,
                      batch_first=True,
                      bidirectional=bidirectional).to(self.device)
            for k in self._KEYS if k != 'state_embedding'
        })

        # Encoder for the commands
        self.cmd_encoder = nn.GRU(self.embedding_dim,
                                  self.hidden_size,
                                  batch_first=True,
                                  bidirectional=bidirectional)

        # RNN that keeps track of the encoded state over time
        self.state_gru = nn.GRU(self.obs_encoded_hidden_size,
                                self.obs_encoded_hidden_size,
                                batch_first=True)

        # Critic to determine a value for the current state
        self.critic = nn.Sequential(
            nn.Linear(self.obs_encoded_hidden_size, hidden_linear_size),
            nn.ReLU(), nn.Linear(hidden_linear_size, 1))

        # # Scorer for the commands
        self.att_cmd = nn.Sequential(
            nn.Linear(
                self.obs_encoded_hidden_size + self.cmd_encoded_hidden_size,
                hidden_linear_size), nn.ReLU(),
            nn.Linear(hidden_linear_size, 1))

        self.to(self.device)

    def forward(self, state_description, commands):
        """
        :param state_description: Dictionary of strings with keys=_KEYS that represents the current game state
        :param commands: Set of possible commands
        :return: Best command from set of possible commands
        """
        state_description_without_graph = {}
        for state in state_description:
            if state != 'state_embedding':
                state_description_without_graph[state] = state_description[
                    state]
        input_dict = self.tokenizer.process(state_description_without_graph)
        command_strings = commands
        commands = self.tokenizer.process_cmds(commands, pad=True)

        # Encode the state embedding (graph)
        graph_embedding = torch.mean(state_description['state_embedding'],
                                     0).unsqueeze(0).to(self.device)

        # Encode the state_description
        obs_encoded = self._observation_encoding(input_dict, graph_embedding)

        if self.state_hidden is None:
            self.state_hidden = torch.zeros(
                (1, 1, self.obs_encoded_hidden_size), device=self.device)

        # encodes encoded state over time
        state_output, self.state_hidden = self.state_gru(
            obs_encoded, self.state_hidden)

        # critic value of the current state
        value = self.critic(state_output).squeeze()
        observation_hidden = self.state_hidden.squeeze(0)

        # Embed and encode commands
        cmd_embedding = self.embedding(commands)
        output, hidden = self.cmd_encoder(cmd_embedding)
        cmd_hidden = hidden.permute(1, 0, 2).reshape(
            hidden.shape[1], -1) if hidden.shape[0] == 2 else hidden

        # concatenate the encoding of the state with every encoded command individually
        observation_hidden = torch.stack([observation_hidden.squeeze()] *
                                         cmd_embedding.size(0))
        cmd_selector_input = torch.cat([cmd_hidden, observation_hidden], -1)

        # compute a score for each of the commands
        score = self.att_cmd(cmd_selector_input).squeeze()
        if len(score.shape) == 0:
            # if only one admissible_command
            score = score.unsqueeze(0)
        prob = F.softmax(score, dim=0)

        # sample from the distribution over commands
        index = prob.multinomial(num_samples=1).squeeze()
        action = command_strings[index]

        return score, prob, value, action, index

    def _observation_encoding(self, input_dict, graph_embedding):
        """ Encodes the state_dict. Each string in the state_dict is encoded individually and then concatenated. """
        assert input_dict.keys() == self.observation_encoder.keys()
        hidden_states = []
        for key, _input in sorted(input_dict.items()):
            gru = self.observation_encoder[key]
            x = _input.unsqueeze(0)
            x = self.embedding(x)
            output, hidden = gru(x)
            if len(hidden.size()) == 3:  # == bidirectional
                hidden = hidden.permute(1, 0, 2)
                hidden = hidden.reshape(hidden.size(0), -1)
            hidden_states.append(hidden)
        hidden_states.append(graph_embedding)
        hidden_states = torch.cat(hidden_states,
                                  -1).unsqueeze(1)  # (batch_size x 1 x 128)
        return hidden_states

    def reset_hidden(self):
        self.state_hidden = None
class ItemScorerModel(nn.Module):
    x_keys = ['recipe_directions', 'inventory']

    def __init__(self, encoder_hidden_dim, device, linear_hidden_dim=32):
        super(ItemScorerModel, self).__init__()

        # translator model for mapping from desired actions performed on ingredients to commands that the parser understands
        # self.translator = CmdTranslator(device)
        self.translator = CmdTranslator.initialize_trained_model(device)

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # RNNs
        self.encoder = nn.ModuleDict({
            k: nn.GRU(self.embedding_dim,
                      encoder_hidden_dim,
                      batch_first=True,
                      bidirectional=True)
            for k in ['recipe_directions', 'inventory']
        })

        # binary classifier determining for every direction in the recipe if it is still necessary to perform it
        self.action_scorer = nn.Sequential(
            nn.Linear(in_features=2 * encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=1),
            nn.Sigmoid())

        self.device = device
        self.to(self.device)

    def forward(self, x, return_actions=False):
        def unpadded_sequence_length(tensor):
            return ((tensor == 0).type(torch.int) <= 0).sum(dim=1)

        def encoder(list_of_str, key):
            """ Encodes a list of strings with the encoder specified by 'key'. """
            tokenized = self.tokenizer.process_cmds(list_of_str, pad=True)
            lengths = unpadded_sequence_length(tokenized)
            embedded = self.embedding(tokenized)
            packed_sequence = pack_padded_sequence(input=embedded,
                                                   lengths=lengths,
                                                   batch_first=True,
                                                   enforce_sorted=False)
            out, hidden = self.encoder[key](packed_sequence)
            hidden = hidden.permute(1, 0, 2).reshape(
                hidden.size(1), -1)  # correct for bididrectional
            return hidden

        scores = []
        pickups = []
        cmds = []
        for item, directions, inventory in zip(x['item'],
                                               x['recipe_directions'],
                                               x['inventory']):

            # encode the recipe directions

            # replace specific ingredient name from the string for more robustness and better generalization
            clnd_directions = [
                direction.replace(item, 'item').strip()
                for direction in directions.split(' <SEP> ')
                if item in direction
            ]
            if len(clnd_directions) == 0:
                # no recipe direction to perform on the ingredient
                clnd_directions = ['nothing']
                clnd_directions_to_encode = ['nothing']
            else:
                clnd_directions_to_encode = [
                    d.split()[0] for d in clnd_directions
                ]

            # encode the recipe directions for the current ingredient
            encoded_directions = encoder(clnd_directions_to_encode,
                                         key='recipe_directions')

            # encode the inventory
            # remove specific ingredient name from the string for more robustness and better generalization
            clnd_inventory = [
                inv.replace(item, '').strip()
                for inv in inventory.split(' <SEP> ') if item in inv
            ]
            if len(clnd_inventory) == 0:
                # ingredient is not in the inventory yet
                clnd_inventory = ['nothing']
            else:
                clnd_inventory = [clnd_inventory[0]]

            # encode the inventory for the current ingredient
            encoded_inventory = encoder(clnd_inventory, key='inventory')[0, :]

            # concatenate the encodings of the inventory to the encoding of every recipe direction
            stckd = torch.cat(
                (encoded_directions,
                 torch.stack(
                     [encoded_inventory] * encoded_directions.shape[0])),
                dim=-1)

            if clnd_directions != ['nothing']:
                # compute the binary score of the recipe directions (determines for every direction if it is needed or not)
                score = self.action_scorer(stckd)
            else:
                score = torch.Tensor([[0]]).type(torch.FloatTensor)

            scores.append(score)

            # pickup is only determined by whether the ingredient is in the inventory or not
            pickups.append(item not in inventory)

            if return_actions:
                # map the output to the actual commmands
                cmds.append(
                    self.to_action(pickups[-1], clnd_directions, scores[-1],
                                   item))

        scores = pad_sequence(scores, batch_first=True,
                              padding_value=0).squeeze().type(
                                  torch.FloatTensor).to(self.device)

        if return_actions:
            return scores, cmds

        return scores

    def to_action(self, pickup, directions, scores, item):
        """
        Applies a threshold (of 0.5) to the output score of the action scorer. Above the threshold the respective recipe
        direction is mapped to an actual command via the translator model.
        """
        cmds = []
        thr = 0.5
        if pickup:
            cmds.append('take {}'.format(item))
        if directions == ['nothing']:
            return cmds
        _, _, _direction = self.translator(directions)
        [
            cmds.append(cmd.replace('item', item))
            for (cmd_score, cmd) in zip(scores, _direction) if cmd_score >= thr
        ]
        return cmds
class CmdTranslator(nn.Module):
    """
    Translates recipe actions to commands that the environment understand.
    E.g. 'fry the yellow omelette' -> 'cook the yellow omelette with stove'
         'dice the juicy red apple' -> 'dice the juicy red apple with knife'
    """
    verbs = ['slice', 'dice', 'chop', 'cook']
    utils = ['knife', 'oven', 'stove', 'BBQ']

    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):

        super(CmdTranslator, self).__init__()

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # RNN to encode the input sentence
        self.encoder = nn.GRU(self.embedding_dim,
                              encoder_hidden_dim,
                              batch_first=True,
                              bidirectional=True)
        self.device = device

        # determines which of the 4 utils ('knife', 'oven', 'stove', 'BBQ') needs to be used for the command
        self.util_decoder = nn.Sequential(
            nn.Linear(in_features=encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        # determines which of the 4 actions ('slice', 'dice', 'chop', 'cook') needs to be used for the command
        self.verb_decoder = nn.Sequential(
            nn.Linear(in_features=encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim),
            # nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=4))

        self.to(self.device)

    def forward(self, directions):
        '''
        Takes a list of recipe directions (e.g. ['fry the item', 'slice the item']) and returns the most likely commands
        (['cook the item with stove', 'slice the item with knife']).
        '''
        def unpadded_sequence_length(tensor):
            return ((tensor == 0).type(torch.int) <= 0).sum(dim=1)

        # encode the input
        tokenized = self.tokenizer.process_cmds(directions, pad=True)
        lengths = unpadded_sequence_length(tokenized)
        embedded = self.embedding(tokenized)
        packed_sequence = pack_padded_sequence(input=embedded,
                                               lengths=lengths,
                                               batch_first=True,
                                               enforce_sorted=False)
        out, hidden = self.encoder(packed_sequence)
        encoded = hidden.permute(1, 0,
                                 2).reshape(hidden.size(1),
                                            -1)  # correct for bididrectional

        # compute the scores for the verbs and utils
        verb_distribution = self.verb_decoder(encoded)
        util_distribution = self.util_decoder(encoded)

        # use the verb and util with the highest probability for the returned command
        verb_idx = torch.argmax(verb_distribution, dim=-1)
        util_idx = torch.argmax(util_distribution, dim=-1)
        cmds = [
            '{} the item with {}'.format(self.verbs[verb_idx[idx]],
                                         self.utils[util_idx[idx]])
            for idx in range(len(directions))
        ]
        return verb_distribution, util_distribution, cmds

    @classmethod
    def initialize_trained_model(cls, device):
        """ Initializes the model from the pre-trained weights. """
        model = cls(device=device)
        model_path = os.path.join(_FILE_PREFIX,
                                  'weights/translator_weights_16')
        model.load_state_dict(torch.load(model_path, map_location=device),
                              strict=True)
        print('Loaded model from {}'.format(model_path))
        return model
Exemple #10
0
class Model(nn.Module):

    # keys of the dictionary of the current game state
    _KEYS = ['observation', 'missing_items', 'unnecessary_items', 'location', 'description', 'previous_cmds',
             'required_utils', 'discovered_locations']

    def __init__(self, device, hidden_size=64, bidirectional=True, hidden_linear_size=128):
        super(Model, self).__init__()

        # Parameters
        self.device = device
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.obs_encoded_hidden_size = self.hidden_size * len(self._KEYS) * (2 if bidirectional else 1)
        self.cmd_encoded_hidden_size = self.hidden_size * (2 if bidirectional else 1)
        self.state_hidden = None

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # Model
        # Encoder for the state dictionary
        self.observation_encoder = nn.ModuleDict(
            {k: nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional).to(
                self.device) for k in self._KEYS}
        )

        self.cmd_encoder = nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional)

        self.state_gru = nn.GRU(self.obs_encoded_hidden_size, self.obs_encoded_hidden_size, batch_first=True)

        self.critic = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size, hidden_linear_size),
                                    nn.ReLU(),
                                    nn.Linear(hidden_linear_size, 1))

        self.att_cmd = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size + self.cmd_encoded_hidden_size, hidden_linear_size),
                                    nn.ReLU(),
                                    nn.Linear(hidden_linear_size, 1))

        self.to(self.device)


    def forward(self, state_description, commands):
 
        input_dict = self.tokenizer.process(state_description)
        command_strings = commands
        commands = self.tokenizer.process_cmds(commands, pad=True)

        obs_encoded = self._observation_encoding(input_dict)

        if self.state_hidden is None:
            self.state_hidden = torch.zeros((1, 1, self.obs_encoded_hidden_size), device=self.device)

        state_output, self.state_hidden = self.state_gru(obs_encoded, self.state_hidden)

        value = self.critic(state_output).squeeze()
        observation_hidden = self.state_hidden.squeeze(0)
       cmd_embedding = self.embedding(commands)
        output, hidden = self.cmd_encoder(cmd_embedding)
        cmd_hidden = hidden.permute(1, 0, 2).reshape(hidden.shape[1], -1) if hidden.shape[0] == 2 else hidden

        observation_hidden = torch.stack([observation_hidden.squeeze()] * cmd_embedding.size(0))
        cmd_selector_input = torch.cat([cmd_hidden, observation_hidden], -1)

        score = self.att_cmd(cmd_selector_input).squeeze()
        if len(score.shape) == 0:
            # if only one admissible_command
            score = score.unsqueeze(0)
        prob = F.softmax(score, dim=0)

        index = prob.multinomial(num_samples=1).squeeze()
        action = command_strings[index]

        return score, prob, value, action, index
Exemple #11
0
class NavigationModel(nn.Module):
    """
    Model that learns to retrieve the following information from the description string:
    - cardinal directions (north, south, ...) to go from current location
    - closed doors in the current location
    """
    nsew = ['north', 'south', 'east', 'west']

    def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16):
        super(NavigationModel, self).__init__()

        # Word embedding (initialized from glove embeddings)
        self.tokenizer = Tokenizer(device=device)
        self.embedding_dim = self.tokenizer.embedding_dim
        self.embedding = nn.Embedding(self.tokenizer.vocab_len,
                                      self.embedding_dim)
        if self.tokenizer.embedding_init is not None:
            self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init)

        # encoder
        self.encoder = nn.GRU(self.embedding_dim,
                              encoder_hidden_dim,
                              batch_first=True,
                              bidirectional=True)
        self.device = device

        # 4 individual binary scorer for each direction (north, south, ...)
        self.nsew_scorer = nn.ModuleDict({
            k: nn.Sequential(
                nn.Linear(in_features=encoder_hidden_dim * 2,
                          out_features=linear_hidden_dim), nn.ReLU(),
                nn.Linear(in_features=linear_hidden_dim, out_features=1),
                nn.Sigmoid())
            for k in self.nsew
        })

        # Binary scorer that determines for every word in the input the probability if it is some form of a closed door
        # e.g. 'To[0] the[0] east[0] you[0] see[0] a[0] closed[0] sliding[1] patio[1] door[1]'
        #      'There[0] is[0] an[0] open[0] metal[0] door[0]'
        self.door_finder = nn.Sequential(
            nn.Linear(in_features=encoder_hidden_dim * 2,
                      out_features=linear_hidden_dim), nn.ReLU(),
            nn.Linear(in_features=linear_hidden_dim, out_features=1),
            nn.Sigmoid())

        self.to(self.device)

    def forward(self, x):
        """
        Takes a list of standard description as input and returns:
         - list of list of closed doors in the current location, e.g. [['green sliding door'], ...]
         - list of list of directions to go from the current location, e.g. [['north', 'west'], ...]
         """
        def unpadded_sequence_length(tensor):
            return ((tensor == 0).type(torch.int) <= 0).sum(dim=1)

        x = clean_description(x)

        # encode the description on sentence level (=encoded) and word level (=out)
        tokenized = self.tokenizer.process_cmds(x, pad=True)
        lengths = unpadded_sequence_length(tokenized)
        embedded = self.embedding(tokenized)
        packed_sequence = pack_padded_sequence(input=embedded,
                                               lengths=lengths,
                                               batch_first=True,
                                               enforce_sorted=False)
        out, hidden = self.encoder(packed_sequence)
        encoded = hidden.permute(1, 0,
                                 2).reshape(hidden.size(1),
                                            -1)  # correct for bididrectional
        out = pad_packed_sequence(out)[0].permute(1, 0, 2)

        # determine scores for cardinal directions based on sentence encoding
        nsew_scores = {k: self.nsew_scorer[k](encoded) for k in self.nsew}

        # determine probabilities for every word that its a closed door (based on contextual word encoding)
        door_scores = []
        for b in range(len(x)):
            new_score = self.door_finder(out[b, :, :]).squeeze(1)
            door_scores.append(new_score)
        door_scores = torch.stack(door_scores)

        # Translate the scores to commands
        nsew, doors = self.to_commands(nsew_scores, door_scores, x)
        return door_scores, nsew_scores, doors, nsew

    def to_commands(self, nsew_scores, door_scores, x):
        """ Maps the scores of the neural models (cardinal directions & closed doors) to commands. """
        # probability thresholds
        nsew_thr = 0.5
        door_thr = 0.5

        nsew = []
        doors = []
        x_pad = np.array([['<PAD>'] * max([len(s.split()) for s in x])] *
                         len(x)).astype('<U60')
        for b in range(len(x)):
            for word_idx, word in enumerate(x[b].split()):
                x_pad[b, word_idx] = word

        for b in range(len(x)):
            nsew.append([k for k in self.nsew if nsew_scores[k][b] > nsew_thr])
            cmd = ' '.join([
                word for word, score in
                zip(list(x_pad[b]),
                    [v.item() for v in list(door_scores[b].detach())])
                if score > door_thr and word != '<PAD>'
            ])
            if cmd == '':
                doors.append([])
            else:
                doors.append([
                    c.strip() + ' door' for c in cmd.split('door')
                    if c.strip() != ''
                ])

        return nsew, doors

    @classmethod
    def initialize_trained_model(cls, device):
        """ Initializes the model from the pre-trained weights. """
        model = cls(device=device)
        model_path = os.path.join(_FILE_PREFIX,
                                  'weights/navigation_weights_16')
        model.load_state_dict(torch.load(model_path, map_location=device),
                              strict=True)
        print('Loaded model from {}'.format(model_path))
        return model