def setup_training(self, pretrained_path): """ Augment word embedding matrix in `pretrained_path` with randomly intialized ones for those words that are in self.words but not in the original embedding matrix and initialize self.emb_word with it. Write out model definitions to `tagger_defs.txt` Inputs: pretrained_path (str): path to pretrained embedding file """ emb_w = read_pretrained_embeddings(pretrained_path) new_emb_w = 0.02 * np.random.random_sample( (len(self.words), emb_w.shape[1])).astype('f') - 0.01 for i in range(len(emb_w)): new_emb_w[i] = emb_w[i] self.emb_word.W.data = new_emb_w with open(os.path.join(self.model_path, "tagger_defs.txt"), "w") as out: json.dump({"model": self.__class__.__name__, "word_dim": self.word_dim, "suffix_dim": self.suffix_dim, "caps_dim": self.caps_dim}, out)
def load_pretrained_embeddings(self, path): self.emb_word.W.data = read_pretrained_embeddings(path)