def __init__(self, encoder_hidden_dim, device, linear_hidden_dim=32): super(ItemScorerModel, self).__init__() # translator model for mapping from desired actions performed on ingredients to commands that the parser understands # self.translator = CmdTranslator(device) self.translator = CmdTranslator.initialize_trained_model(device) # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # RNNs self.encoder = nn.ModuleDict({ k: nn.GRU(self.embedding_dim, encoder_hidden_dim, batch_first=True, bidirectional=True) for k in ['recipe_directions', 'inventory'] }) # binary classifier determining for every direction in the recipe if it is still necessary to perform it self.action_scorer = nn.Sequential( nn.Linear(in_features=2 * encoder_hidden_dim * 2, out_features=linear_hidden_dim), # nn.Dropout(dropout), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=1), nn.Sigmoid()) self.device = device self.to(self.device)
def __init__(self, device, hidden_size=64, bidirectional=True, hidden_linear_size=128): super(Model, self).__init__() # Parameters self.device = device self.hidden_size = hidden_size self.bidirectional = bidirectional self.obs_encoded_hidden_size = self.hidden_size * len(self._KEYS) * (2 if bidirectional else 1) self.cmd_encoded_hidden_size = self.hidden_size * (2 if bidirectional else 1) self.state_hidden = None # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # Model # Encoder for the state dictionary self.observation_encoder = nn.ModuleDict( {k: nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional).to( self.device) for k in self._KEYS} ) self.cmd_encoder = nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional) self.state_gru = nn.GRU(self.obs_encoded_hidden_size, self.obs_encoded_hidden_size, batch_first=True) self.critic = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size, hidden_linear_size), nn.ReLU(), nn.Linear(hidden_linear_size, 1)) self.att_cmd = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size + self.cmd_encoded_hidden_size, hidden_linear_size), nn.ReLU(), nn.Linear(hidden_linear_size, 1)) self.to(self.device)
def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16): super(NavigationModel, self).__init__() # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # encoder self.encoder = nn.GRU(self.embedding_dim, encoder_hidden_dim, batch_first=True, bidirectional=True) self.device = device # 4 individual binary scorer for each direction (north, south, ...) self.nsew_scorer = nn.ModuleDict({ k: nn.Sequential( nn.Linear(in_features=encoder_hidden_dim * 2, out_features=linear_hidden_dim), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=1), nn.Sigmoid()) for k in self.nsew }) # Binary scorer that determines for every word in the input the probability if it is some form of a closed door # e.g. 'To[0] the[0] east[0] you[0] see[0] a[0] closed[0] sliding[1] patio[1] door[1]' # 'There[0] is[0] an[0] open[0] metal[0] door[0]' self.door_finder = nn.Sequential( nn.Linear(in_features=encoder_hidden_dim * 2, out_features=linear_hidden_dim), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=1), nn.Sigmoid()) self.to(self.device)
def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16): super(CmdTranslator, self).__init__() # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # RNN to encode the input sentence self.encoder = nn.GRU(self.embedding_dim, encoder_hidden_dim, batch_first=True, bidirectional=True) self.device = device # determines which of the 4 utils ('knife', 'oven', 'stove', 'BBQ') needs to be used for the command self.util_decoder = nn.Sequential( nn.Linear(in_features=encoder_hidden_dim * 2, out_features=linear_hidden_dim), # nn.Dropout(dropout), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=4)) # determines which of the 4 actions ('slice', 'dice', 'chop', 'cook') needs to be used for the command self.verb_decoder = nn.Sequential( nn.Linear(in_features=encoder_hidden_dim * 2, out_features=linear_hidden_dim), # nn.Dropout(dropout), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=4)) self.to(self.device)
def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16): super(NavigationModel, self).__init__() self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) self.encoder = nn.GRU(self.embedding_dim, encoder_hidden_dim, batch_first=True, bidirectional=True) self.device = device self.nsew_scorer = nn.ModuleDict({ k: nn.Sequential( nn.Linear(in_features=encoder_hidden_dim * 2, out_features=linear_hidden_dim), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=1), nn.Sigmoid()) for k in self.nsew })
def __init__(self, device, state_embedding_dim=100, hidden_size=64, bidirectional=True, hidden_linear_size=128): super(Model, self).__init__() # Parameters self.device = device self.hidden_size = hidden_size self.bidirectional = bidirectional self.obs_encoded_hidden_size = self.hidden_size * ( len(self._KEYS) - 1) * (2 if bidirectional else 1) + state_embedding_dim self.cmd_encoded_hidden_size = self.hidden_size * (2 if bidirectional else 1) self.state_hidden = None # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # Model # Change state_embedding (graph embedding) dimension to embedding_dim # self.graph_encoder = nn.Linear(state_embedding_dim, self.embedding_dim) # Encoder for the state dictionary self.observation_encoder = nn.ModuleDict({ k: nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional).to(self.device) for k in self._KEYS if k != 'state_embedding' }) # Encoder for the commands self.cmd_encoder = nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional) # RNN that keeps track of the encoded state over time self.state_gru = nn.GRU(self.obs_encoded_hidden_size, self.obs_encoded_hidden_size, batch_first=True) # Critic to determine a value for the current state self.critic = nn.Sequential( nn.Linear(self.obs_encoded_hidden_size, hidden_linear_size), nn.ReLU(), nn.Linear(hidden_linear_size, 1)) # # Scorer for the commands self.att_cmd = nn.Sequential( nn.Linear( self.obs_encoded_hidden_size + self.cmd_encoded_hidden_size, hidden_linear_size), nn.ReLU(), nn.Linear(hidden_linear_size, 1)) self.to(self.device)
class Model(nn.Module): """ Model that learns to map from the current game state (dictionary of strings) and a list of high level command to the command that has the highest expected return. """ # keys of the dictionary of the current game state _KEYS = [ 'observation', 'missing_items', 'unnecessary_items', 'description', 'previous_cmds', 'state_embedding', 'required_utils' ] def __init__(self, device, state_embedding_dim=100, hidden_size=64, bidirectional=True, hidden_linear_size=128): super(Model, self).__init__() # Parameters self.device = device self.hidden_size = hidden_size self.bidirectional = bidirectional self.obs_encoded_hidden_size = self.hidden_size * ( len(self._KEYS) - 1) * (2 if bidirectional else 1) + state_embedding_dim self.cmd_encoded_hidden_size = self.hidden_size * (2 if bidirectional else 1) self.state_hidden = None # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # Model # Change state_embedding (graph embedding) dimension to embedding_dim # self.graph_encoder = nn.Linear(state_embedding_dim, self.embedding_dim) # Encoder for the state dictionary self.observation_encoder = nn.ModuleDict({ k: nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional).to(self.device) for k in self._KEYS if k != 'state_embedding' }) # Encoder for the commands self.cmd_encoder = nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional) # RNN that keeps track of the encoded state over time self.state_gru = nn.GRU(self.obs_encoded_hidden_size, self.obs_encoded_hidden_size, batch_first=True) # Critic to determine a value for the current state self.critic = nn.Sequential( nn.Linear(self.obs_encoded_hidden_size, hidden_linear_size), nn.ReLU(), nn.Linear(hidden_linear_size, 1)) # # Scorer for the commands self.att_cmd = nn.Sequential( nn.Linear( self.obs_encoded_hidden_size + self.cmd_encoded_hidden_size, hidden_linear_size), nn.ReLU(), nn.Linear(hidden_linear_size, 1)) self.to(self.device) def forward(self, state_description, commands): """ :param state_description: Dictionary of strings with keys=_KEYS that represents the current game state :param commands: Set of possible commands :return: Best command from set of possible commands """ state_description_without_graph = {} for state in state_description: if state != 'state_embedding': state_description_without_graph[state] = state_description[ state] input_dict = self.tokenizer.process(state_description_without_graph) command_strings = commands commands = self.tokenizer.process_cmds(commands, pad=True) # Encode the state embedding (graph) graph_embedding = torch.mean(state_description['state_embedding'], 0).unsqueeze(0).to(self.device) # Encode the state_description obs_encoded = self._observation_encoding(input_dict, graph_embedding) if self.state_hidden is None: self.state_hidden = torch.zeros( (1, 1, self.obs_encoded_hidden_size), device=self.device) # encodes encoded state over time state_output, self.state_hidden = self.state_gru( obs_encoded, self.state_hidden) # critic value of the current state value = self.critic(state_output).squeeze() observation_hidden = self.state_hidden.squeeze(0) # Embed and encode commands cmd_embedding = self.embedding(commands) output, hidden = self.cmd_encoder(cmd_embedding) cmd_hidden = hidden.permute(1, 0, 2).reshape( hidden.shape[1], -1) if hidden.shape[0] == 2 else hidden # concatenate the encoding of the state with every encoded command individually observation_hidden = torch.stack([observation_hidden.squeeze()] * cmd_embedding.size(0)) cmd_selector_input = torch.cat([cmd_hidden, observation_hidden], -1) # compute a score for each of the commands score = self.att_cmd(cmd_selector_input).squeeze() if len(score.shape) == 0: # if only one admissible_command score = score.unsqueeze(0) prob = F.softmax(score, dim=0) # sample from the distribution over commands index = prob.multinomial(num_samples=1).squeeze() action = command_strings[index] return score, prob, value, action, index def _observation_encoding(self, input_dict, graph_embedding): """ Encodes the state_dict. Each string in the state_dict is encoded individually and then concatenated. """ assert input_dict.keys() == self.observation_encoder.keys() hidden_states = [] for key, _input in sorted(input_dict.items()): gru = self.observation_encoder[key] x = _input.unsqueeze(0) x = self.embedding(x) output, hidden = gru(x) if len(hidden.size()) == 3: # == bidirectional hidden = hidden.permute(1, 0, 2) hidden = hidden.reshape(hidden.size(0), -1) hidden_states.append(hidden) hidden_states.append(graph_embedding) hidden_states = torch.cat(hidden_states, -1).unsqueeze(1) # (batch_size x 1 x 128) return hidden_states def reset_hidden(self): self.state_hidden = None
class ItemScorerModel(nn.Module): x_keys = ['recipe_directions', 'inventory'] def __init__(self, encoder_hidden_dim, device, linear_hidden_dim=32): super(ItemScorerModel, self).__init__() # translator model for mapping from desired actions performed on ingredients to commands that the parser understands # self.translator = CmdTranslator(device) self.translator = CmdTranslator.initialize_trained_model(device) # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # RNNs self.encoder = nn.ModuleDict({ k: nn.GRU(self.embedding_dim, encoder_hidden_dim, batch_first=True, bidirectional=True) for k in ['recipe_directions', 'inventory'] }) # binary classifier determining for every direction in the recipe if it is still necessary to perform it self.action_scorer = nn.Sequential( nn.Linear(in_features=2 * encoder_hidden_dim * 2, out_features=linear_hidden_dim), # nn.Dropout(dropout), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=1), nn.Sigmoid()) self.device = device self.to(self.device) def forward(self, x, return_actions=False): def unpadded_sequence_length(tensor): return ((tensor == 0).type(torch.int) <= 0).sum(dim=1) def encoder(list_of_str, key): """ Encodes a list of strings with the encoder specified by 'key'. """ tokenized = self.tokenizer.process_cmds(list_of_str, pad=True) lengths = unpadded_sequence_length(tokenized) embedded = self.embedding(tokenized) packed_sequence = pack_padded_sequence(input=embedded, lengths=lengths, batch_first=True, enforce_sorted=False) out, hidden = self.encoder[key](packed_sequence) hidden = hidden.permute(1, 0, 2).reshape( hidden.size(1), -1) # correct for bididrectional return hidden scores = [] pickups = [] cmds = [] for item, directions, inventory in zip(x['item'], x['recipe_directions'], x['inventory']): # encode the recipe directions # replace specific ingredient name from the string for more robustness and better generalization clnd_directions = [ direction.replace(item, 'item').strip() for direction in directions.split(' <SEP> ') if item in direction ] if len(clnd_directions) == 0: # no recipe direction to perform on the ingredient clnd_directions = ['nothing'] clnd_directions_to_encode = ['nothing'] else: clnd_directions_to_encode = [ d.split()[0] for d in clnd_directions ] # encode the recipe directions for the current ingredient encoded_directions = encoder(clnd_directions_to_encode, key='recipe_directions') # encode the inventory # remove specific ingredient name from the string for more robustness and better generalization clnd_inventory = [ inv.replace(item, '').strip() for inv in inventory.split(' <SEP> ') if item in inv ] if len(clnd_inventory) == 0: # ingredient is not in the inventory yet clnd_inventory = ['nothing'] else: clnd_inventory = [clnd_inventory[0]] # encode the inventory for the current ingredient encoded_inventory = encoder(clnd_inventory, key='inventory')[0, :] # concatenate the encodings of the inventory to the encoding of every recipe direction stckd = torch.cat( (encoded_directions, torch.stack( [encoded_inventory] * encoded_directions.shape[0])), dim=-1) if clnd_directions != ['nothing']: # compute the binary score of the recipe directions (determines for every direction if it is needed or not) score = self.action_scorer(stckd) else: score = torch.Tensor([[0]]).type(torch.FloatTensor) scores.append(score) # pickup is only determined by whether the ingredient is in the inventory or not pickups.append(item not in inventory) if return_actions: # map the output to the actual commmands cmds.append( self.to_action(pickups[-1], clnd_directions, scores[-1], item)) scores = pad_sequence(scores, batch_first=True, padding_value=0).squeeze().type( torch.FloatTensor).to(self.device) if return_actions: return scores, cmds return scores def to_action(self, pickup, directions, scores, item): """ Applies a threshold (of 0.5) to the output score of the action scorer. Above the threshold the respective recipe direction is mapped to an actual command via the translator model. """ cmds = [] thr = 0.5 if pickup: cmds.append('take {}'.format(item)) if directions == ['nothing']: return cmds _, _, _direction = self.translator(directions) [ cmds.append(cmd.replace('item', item)) for (cmd_score, cmd) in zip(scores, _direction) if cmd_score >= thr ] return cmds
class CmdTranslator(nn.Module): """ Translates recipe actions to commands that the environment understand. E.g. 'fry the yellow omelette' -> 'cook the yellow omelette with stove' 'dice the juicy red apple' -> 'dice the juicy red apple with knife' """ verbs = ['slice', 'dice', 'chop', 'cook'] utils = ['knife', 'oven', 'stove', 'BBQ'] def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16): super(CmdTranslator, self).__init__() # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # RNN to encode the input sentence self.encoder = nn.GRU(self.embedding_dim, encoder_hidden_dim, batch_first=True, bidirectional=True) self.device = device # determines which of the 4 utils ('knife', 'oven', 'stove', 'BBQ') needs to be used for the command self.util_decoder = nn.Sequential( nn.Linear(in_features=encoder_hidden_dim * 2, out_features=linear_hidden_dim), # nn.Dropout(dropout), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=4)) # determines which of the 4 actions ('slice', 'dice', 'chop', 'cook') needs to be used for the command self.verb_decoder = nn.Sequential( nn.Linear(in_features=encoder_hidden_dim * 2, out_features=linear_hidden_dim), # nn.Dropout(dropout), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=4)) self.to(self.device) def forward(self, directions): ''' Takes a list of recipe directions (e.g. ['fry the item', 'slice the item']) and returns the most likely commands (['cook the item with stove', 'slice the item with knife']). ''' def unpadded_sequence_length(tensor): return ((tensor == 0).type(torch.int) <= 0).sum(dim=1) # encode the input tokenized = self.tokenizer.process_cmds(directions, pad=True) lengths = unpadded_sequence_length(tokenized) embedded = self.embedding(tokenized) packed_sequence = pack_padded_sequence(input=embedded, lengths=lengths, batch_first=True, enforce_sorted=False) out, hidden = self.encoder(packed_sequence) encoded = hidden.permute(1, 0, 2).reshape(hidden.size(1), -1) # correct for bididrectional # compute the scores for the verbs and utils verb_distribution = self.verb_decoder(encoded) util_distribution = self.util_decoder(encoded) # use the verb and util with the highest probability for the returned command verb_idx = torch.argmax(verb_distribution, dim=-1) util_idx = torch.argmax(util_distribution, dim=-1) cmds = [ '{} the item with {}'.format(self.verbs[verb_idx[idx]], self.utils[util_idx[idx]]) for idx in range(len(directions)) ] return verb_distribution, util_distribution, cmds @classmethod def initialize_trained_model(cls, device): """ Initializes the model from the pre-trained weights. """ model = cls(device=device) model_path = os.path.join(_FILE_PREFIX, 'weights/translator_weights_16') model.load_state_dict(torch.load(model_path, map_location=device), strict=True) print('Loaded model from {}'.format(model_path)) return model
class Model(nn.Module): # keys of the dictionary of the current game state _KEYS = ['observation', 'missing_items', 'unnecessary_items', 'location', 'description', 'previous_cmds', 'required_utils', 'discovered_locations'] def __init__(self, device, hidden_size=64, bidirectional=True, hidden_linear_size=128): super(Model, self).__init__() # Parameters self.device = device self.hidden_size = hidden_size self.bidirectional = bidirectional self.obs_encoded_hidden_size = self.hidden_size * len(self._KEYS) * (2 if bidirectional else 1) self.cmd_encoded_hidden_size = self.hidden_size * (2 if bidirectional else 1) self.state_hidden = None # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # Model # Encoder for the state dictionary self.observation_encoder = nn.ModuleDict( {k: nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional).to( self.device) for k in self._KEYS} ) self.cmd_encoder = nn.GRU(self.embedding_dim, self.hidden_size, batch_first=True, bidirectional=bidirectional) self.state_gru = nn.GRU(self.obs_encoded_hidden_size, self.obs_encoded_hidden_size, batch_first=True) self.critic = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size, hidden_linear_size), nn.ReLU(), nn.Linear(hidden_linear_size, 1)) self.att_cmd = nn.Sequential(nn.Linear(self.obs_encoded_hidden_size + self.cmd_encoded_hidden_size, hidden_linear_size), nn.ReLU(), nn.Linear(hidden_linear_size, 1)) self.to(self.device) def forward(self, state_description, commands): input_dict = self.tokenizer.process(state_description) command_strings = commands commands = self.tokenizer.process_cmds(commands, pad=True) obs_encoded = self._observation_encoding(input_dict) if self.state_hidden is None: self.state_hidden = torch.zeros((1, 1, self.obs_encoded_hidden_size), device=self.device) state_output, self.state_hidden = self.state_gru(obs_encoded, self.state_hidden) value = self.critic(state_output).squeeze() observation_hidden = self.state_hidden.squeeze(0) cmd_embedding = self.embedding(commands) output, hidden = self.cmd_encoder(cmd_embedding) cmd_hidden = hidden.permute(1, 0, 2).reshape(hidden.shape[1], -1) if hidden.shape[0] == 2 else hidden observation_hidden = torch.stack([observation_hidden.squeeze()] * cmd_embedding.size(0)) cmd_selector_input = torch.cat([cmd_hidden, observation_hidden], -1) score = self.att_cmd(cmd_selector_input).squeeze() if len(score.shape) == 0: # if only one admissible_command score = score.unsqueeze(0) prob = F.softmax(score, dim=0) index = prob.multinomial(num_samples=1).squeeze() action = command_strings[index] return score, prob, value, action, index
class NavigationModel(nn.Module): """ Model that learns to retrieve the following information from the description string: - cardinal directions (north, south, ...) to go from current location - closed doors in the current location """ nsew = ['north', 'south', 'east', 'west'] def __init__(self, device, encoder_hidden_dim=16, linear_hidden_dim=16): super(NavigationModel, self).__init__() # Word embedding (initialized from glove embeddings) self.tokenizer = Tokenizer(device=device) self.embedding_dim = self.tokenizer.embedding_dim self.embedding = nn.Embedding(self.tokenizer.vocab_len, self.embedding_dim) if self.tokenizer.embedding_init is not None: self.embedding.weight = nn.Parameter(self.tokenizer.embedding_init) # encoder self.encoder = nn.GRU(self.embedding_dim, encoder_hidden_dim, batch_first=True, bidirectional=True) self.device = device # 4 individual binary scorer for each direction (north, south, ...) self.nsew_scorer = nn.ModuleDict({ k: nn.Sequential( nn.Linear(in_features=encoder_hidden_dim * 2, out_features=linear_hidden_dim), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=1), nn.Sigmoid()) for k in self.nsew }) # Binary scorer that determines for every word in the input the probability if it is some form of a closed door # e.g. 'To[0] the[0] east[0] you[0] see[0] a[0] closed[0] sliding[1] patio[1] door[1]' # 'There[0] is[0] an[0] open[0] metal[0] door[0]' self.door_finder = nn.Sequential( nn.Linear(in_features=encoder_hidden_dim * 2, out_features=linear_hidden_dim), nn.ReLU(), nn.Linear(in_features=linear_hidden_dim, out_features=1), nn.Sigmoid()) self.to(self.device) def forward(self, x): """ Takes a list of standard description as input and returns: - list of list of closed doors in the current location, e.g. [['green sliding door'], ...] - list of list of directions to go from the current location, e.g. [['north', 'west'], ...] """ def unpadded_sequence_length(tensor): return ((tensor == 0).type(torch.int) <= 0).sum(dim=1) x = clean_description(x) # encode the description on sentence level (=encoded) and word level (=out) tokenized = self.tokenizer.process_cmds(x, pad=True) lengths = unpadded_sequence_length(tokenized) embedded = self.embedding(tokenized) packed_sequence = pack_padded_sequence(input=embedded, lengths=lengths, batch_first=True, enforce_sorted=False) out, hidden = self.encoder(packed_sequence) encoded = hidden.permute(1, 0, 2).reshape(hidden.size(1), -1) # correct for bididrectional out = pad_packed_sequence(out)[0].permute(1, 0, 2) # determine scores for cardinal directions based on sentence encoding nsew_scores = {k: self.nsew_scorer[k](encoded) for k in self.nsew} # determine probabilities for every word that its a closed door (based on contextual word encoding) door_scores = [] for b in range(len(x)): new_score = self.door_finder(out[b, :, :]).squeeze(1) door_scores.append(new_score) door_scores = torch.stack(door_scores) # Translate the scores to commands nsew, doors = self.to_commands(nsew_scores, door_scores, x) return door_scores, nsew_scores, doors, nsew def to_commands(self, nsew_scores, door_scores, x): """ Maps the scores of the neural models (cardinal directions & closed doors) to commands. """ # probability thresholds nsew_thr = 0.5 door_thr = 0.5 nsew = [] doors = [] x_pad = np.array([['<PAD>'] * max([len(s.split()) for s in x])] * len(x)).astype('<U60') for b in range(len(x)): for word_idx, word in enumerate(x[b].split()): x_pad[b, word_idx] = word for b in range(len(x)): nsew.append([k for k in self.nsew if nsew_scores[k][b] > nsew_thr]) cmd = ' '.join([ word for word, score in zip(list(x_pad[b]), [v.item() for v in list(door_scores[b].detach())]) if score > door_thr and word != '<PAD>' ]) if cmd == '': doors.append([]) else: doors.append([ c.strip() + ' door' for c in cmd.split('door') if c.strip() != '' ]) return nsew, doors @classmethod def initialize_trained_model(cls, device): """ Initializes the model from the pre-trained weights. """ model = cls(device=device) model_path = os.path.join(_FILE_PREFIX, 'weights/navigation_weights_16') model.load_state_dict(torch.load(model_path, map_location=device), strict=True) print('Loaded model from {}'.format(model_path)) return model