def _load_pre_trained(weights_matrix, gpu, non_trainable=False): weights_matrix = torch.Tensor(weights_matrix).cuda( ) if gpu else torch.Tensor(weights_matrix).cuda() num_embeddings, embedding_dim = weights_matrix.size() emb_layer = Embedding(num_embeddings, embedding_dim) emb_layer.load_state_dict({'weight': weights_matrix}) if non_trainable: emb_layer.weight.requires_grad = False return emb_layer
def get_keras_embedding(self): """ Load keras embedding layer Returns: embedding: Keras embedding layer with embeddings loaded """ embedding_layer = Embedding(num_embeddings=self.vocab_size, embedding_dim=self.num_dim) embedding_layer.load_state_dict({'weight': self.embeddings}) embedding_layer.weight.requires_grad = False # Non-trainable return embedding_layer
class BaweNeuralExtractor(BaseFeatureExtractor): def __init__(self): bawe_train_stats = pickle.load( resource_stream('notebooks', 'bawe_train_stats.p')) self._pos_vocab = bawe_train_stats['pos_vocab'] self._embedding = Embedding(len(self._pos_vocab), 10, padding_idx=self._pos_vocab['<pad>']) self._embedding.load_state_dict( torch.load(resource_stream('notebooks', 'bawe_embedding_sd.pt'))) sentence_encoder = SentenceEncoder(10, 10) par_encoder = ParEncoder(10, 5) self._style_encoder = StyleEncoder(sentence_encoder, par_encoder) self._style_encoder.load_state_dict( torch.load( resource_stream('notebooks.resources', 'bawe_style_encoder_sd.pt'))) def extract(self, text: str) -> ndarray: doc = nlp(text) tokens = [[token.text for token in sent] for sent in doc.sents] pos_tokens = [[self._pos_vocab[token] for token in sent] for sent in tokens] max_sent_len = max([len(sent) for sent in pos_tokens]) sent_tensors = [] sent_lens = [] for sent in pos_tokens: sent_tensor = torch.full([max_sent_len], self._pos_vocab['<pad>']) sent_len = torch.tensor(len(sent)) sent_tensor[:sent_len] = torch.tensor(sent) sent_tensors.append(sent_tensor.unsqueeze(0)) sent_lens.append(sent_len.unsqueeze(0)) sent_count = len(sent_tensors) new_sent_count = sent_count - (sent_count % 4) tensor = torch.cat(sent_tensors, dim=0)[:new_sent_count] sent_lens = torch.cat(sent_lens, dim=0)[:new_sent_count] with torch.no_grad(): embed_tensor = self._embedding(tensor) embed_tensor = torch.transpose(embed_tensor, 0, 1) packed_tensor = pack_padded_sequence(embed_tensor, sent_lens, enforce_sorted=False) features = self._style_encoder(packed_tensor) return features.numpy()
def __init__(self, model_file, use_device='cpu'): self._vocabulary = Vocabulary(CORPUS_NAME) if use_device == 'gpu': checkpoint = torch.load(model_file, map_location=torch.device('cpu')) else: checkpoint = torch.load(model_file) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] embedding_sd = checkpoint['embedding'] self._vocabulary.__dict__ = checkpoint['voc_dict'] self._vocabulary.replace_word('s', 'is') print('Building _encoder and _decoder ...') embedding = Embedding(self._vocabulary.num_words, HIDDEN_SIZE) embedding.load_state_dict(embedding_sd) encoder = EncoderRNN(HIDDEN_SIZE, embedding, ENCODER_N_LAYERS, DROPOUT) decoder = LuongAttnDecoderRNN(ATTN_MODEL, embedding, HIDDEN_SIZE, self._vocabulary.num_words, DECODER_N_LAYERS, DROPOUT) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) self._encoder = encoder.to(DEVICE) self._decoder = decoder.to(DEVICE) print('Models built and ready to go!') encoder.eval() decoder.eval() self._search_decoder = GreedySearchDecoder(encoder, decoder) pygame.mixer.init() if SPEECH: self._recognizer = sr.Recognizer() self._mic = sr.Microphone() corpus = os.path.join('data', CORPUS_NAME) self._conversation_logfile = os.path.join(corpus, 'conversations.txt')
def load_pre_trained_glove(embedding_layer: nn.Embedding, word_to_idx: Dict[str, int], word_to_glove_idx: Dict[str, int], glove_path: str) -> nn.Embedding: embedding_matrix = embedding_layer.weight.detach().numpy() glove_words_sorted_indices = sorted(word_to_glove_idx.items(), key=lambda x: x[1]) total_glove_words = len(glove_words_sorted_indices) current_glove_index = 0 with open(glove_path, "r", encoding="utf-8") as f: for index, line in enumerate(f): # we read already all words in dictionary that are also in glove vocab if current_glove_index == total_glove_words: break # skip until we find a word that also appears in vocab glove_word, glove_index = glove_words_sorted_indices[ current_glove_index] if index != glove_index: continue # retrieve pre-trained vector line = line[:-1] # remove end of line line_tokens = line.split() word = line_tokens[0] vector = np.array(line_tokens[1:], dtype=np.float) # find out the corresponding index in our initiated embedding matrix and update it word_data_vocab_index = word_to_idx[word] embedding_matrix[word_data_vocab_index] = vector # update to next word appearing in glove current_glove_index += 1 # load the new embedding matrix as the embedding layer parameters embedding_layer.load_state_dict( {'weight': torch.tensor(embedding_matrix)}) return embedding_layer
def prep_embedding_layer(vectors, trainable=False): """A helper function to return pytorch nn embedding layer. Args: vectors: weight matrix of pre-trained or randomized vectors trainable: bool, default to False. If False, keep static. Returns: embedding_layer: torch sparse embedding layer, number of embeddings, and number of embedding dims source: https://medium.com/@martinpella/how-to-use-pre-trained-word-embeddings-in-pytorch-71ca59249f76 """ num_embeddings, embedding_dim = vectors.size() embedding_layer = Embedding(num_embeddings, embedding_dim) embedding_layer.load_state_dict({'weight': vectors}) if trainable: embedding_layer.weight.requires_grad = True else: embedding_layer.weight.requires_grad = False return embedding_layer
with open('%s/embedding_model.vec' % DATA_PATH) as f: embs = [] words = {} i = 0 for l in f.readlines()[1:]: if i <= NUM_ENTITIES: word, emb = l.strip().split(' ', 1) words[word] = i i += 1 with open('lstm_lstm.pickle', 'rb') as f: lstm = pickle.load(f).cpu() emb = Embedding(NUM_ENTITIES, DIM, padding_idx=0) emb.load_state_dict(torch.load('emb_post_lstm.txt')) SENTENCES = [ "Ubuntu", "F**k Linux", "I am a n00b", "n00b I am", "This is a great idea for the community.", "I wish new users would respect the rules.", "Thank you so much!", "Thank you so much! From all my noob heart!", "Are you sure you want to do that?", "Should I choose Ubuntu or Fedora?", "I am having problems with the NVIDIA drivers", "Please watch your language, this is a PG-13 environment", "This is not the appropriate place to ask this question",
class BaseModel(nn.Module): @classmethod def load_weights(cls, config: Dict, weights_path: Path): logger.info(f"Loading weights from {weights_path}") state_dict = torch.load(weights_path) return cls.from_state_dict(config, state_dict) @classmethod def from_checkpoint(cls, config: Dict, checkpoint_path: Path): logger.info(f"Loading checkpoint from {checkpoint_path}") checkpoint = torch.load(checkpoint_path) return cls.from_state_dict(config, checkpoint["model_state_dict"]) @classmethod def from_state_dict(cls, config: Dict, state_dict: Dict): model = cls(**config) model.load_state_dict(state_dict) return model def __init__( self, num_embeddings: int = 1024, embedding_dim: int = 128, embedding_initial_weights: Optional[Tensor] = None, freeze_embedding: bool = False, rnn_style: str = "LSTM", rnn_num_layers: int = 1, hidden_dim: int = 128, bidirectional: bool = False, ): super().__init__() self.embedding = Embedding(num_embeddings, embedding_dim) if embedding_initial_weights is not None: self.embedding.load_state_dict({"weight": embedding_initial_weights}) if freeze_embedding: for param in self.embedding.parameters(): param.requires_grad = False self.rnn = RNN_CLASS_MAPPING[rnn_style]( embedding_dim, hidden_dim, rnn_num_layers, bidirectional=bidirectional, batch_first=True, ) num_directions = 2 if bidirectional else 1 self.rnn_output_dim = num_directions * hidden_dim self.hidden_state_dim = rnn_num_layers * num_directions * hidden_dim def forward(self, input_ids: Tensor): """ Arguments input_ids: torch.LongTensor of shape (BS, L) Returns output: torch.FloatTensor of shape (BS, L, num_directions*hidden_dim) hidden_state: torch.FloatTensor of shape (BS, rnn_num_layers*num_directions, hidden_size) """ embedded = self.embedding(input_ids) output, hidden_state = self.rnn(embedded) hidden_state = hidden_state.permute(1, 0, 2) return output, hidden_state def save_weights(self, weights_path: Path): logger.info(f"Saving model weights to {weights_path}") torch.save(self.state_dict(), weights_path)
class Encoder(Module): def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers, bidirectional, max_length, dropout_rate, embedding_weights=None): super(Encoder, self).__init__() self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.num_layers = num_layers self.max_length = max_length self.num_directions = 2 if bidirectional else 1 self.embedding_layer = Embedding(self.vocab_size, self.embedding_dim, padding_idx=0) if not (embedding_weights == None): self.embedding_layer.load_state_dict({'weight': embedding_weights}) self.dropout = Dropout(p=dropout_rate) self.lstm_layer = LSTM(self.embedding_dim, self.hidden_dim, num_layers=self.num_layers, bidirectional=bidirectional, batch_first=True, dropout=dropout_rate) def forward(self, x, x_lengths, initial_state, inference=False, device=torch.device('cuda')): if initial_state == None: initial_state = self.init_hidden_state(x.size(0), device=device) if inference: with torch.no_grad(): x = self.dropout(self.embedding_layer(x)) x = pack_padded_sequence(x, x_lengths, batch_first=True, enforce_sorted=False) x, last_state = self.lstm_layer(x, initial_state) ''' we don't need the cell output in the encoder we are only interested in the state variable. state is a tuple (hidden_state, cell_state) ''' x, _ = pad_packed_sequence(x, batch_first=True, total_length=self.max_length) #x.shape = [batch_size, seq_len, hidden_dim * num_directions] #last_state[0].shape = [num_layers * num_directions, batch_size, hidden_dim] #last_state[1].shape is equal to last_state[0].shape return x, last_state else: x = self.dropout(self.embedding_layer(x)) x = pack_padded_sequence(x, x_lengths, batch_first=True, enforce_sorted=False) x, last_state = self.lstm_layer(x, initial_state) ''' we don't need the cell output in the encoder we are only interested in the state variable. state is a tuple (hidden_state, cell_state) ''' x, _ = pad_packed_sequence(x, batch_first=True, total_length=self.max_length) #x.shape = [batch_size, seq_len, hidden_dim * directions] #last_state[0].shape = [num_layers * num_directions, batch_size, hidden_dim] #last_state[1].shape is equal to last_state[0].shape return x, last_state def init_hidden_state(self, batch_size, device=torch.device('cuda')): return (torch.zeros([ self.num_layers * self.num_directions, batch_size, self.hidden_dim ], device=device), torch.zeros([ self.num_layers * self.num_directions, batch_size, self.hidden_dim ], device=device))