Ejemplo n.º 1
0
 def setup_training(self, pretrained_path):
     """
     Augment word embedding matrix in `pretrained_path` with
     randomly intialized ones for those words that are in self.words
     but not in the original embedding matrix and initialize self.emb_word
     with it. Write out model definitions to `tagger_defs.txt`
     Inputs:
         pretrained_path (str): path to pretrained embedding file
     """
     emb_w = read_pretrained_embeddings(pretrained_path)
     new_emb_w = 0.02 * np.random.random_sample(
             (len(self.words), emb_w.shape[1])).astype('f') - 0.01
     for i in range(len(emb_w)):
         new_emb_w[i] = emb_w[i]
     self.emb_word.W.data = new_emb_w
     with open(os.path.join(self.model_path, "tagger_defs.txt"), "w") as out:
         json.dump({"model": self.__class__.__name__,
                    "word_dim": self.word_dim,
                    "suffix_dim": self.suffix_dim,
                    "caps_dim": self.caps_dim}, out)
Ejemplo n.º 2
0
 def load_pretrained_embeddings(self, path):
     self.emb_word.W.data = read_pretrained_embeddings(path)