Esempio n. 1
0
 def init_vocab(self, data):
     assert self.eval == False  # for eval vocab must exist
     charvocab = CharVocab(data, self.args['shorthand'])
     wordvocab = WordVocab(data,
                           self.args['shorthand'],
                           cutoff=7,
                           lower=True)
     uposvocab = WordVocab(data, self.args['shorthand'], idx=1)
     xposvocab = xpos_vocab_factory(data, self.args['shorthand'])
     featsvocab = FeatureVocab(data, self.args['shorthand'], idx=3)
     lemmavocab = WordVocab(data,
                            self.args['shorthand'],
                            cutoff=7,
                            idx=4,
                            lower=True)
     deprelvocab = WordVocab(data, self.args['shorthand'], idx=6)
     vocab = MultiVocab({
         'char': charvocab,
         'word': wordvocab,
         'upos': uposvocab,
         'xpos': xposvocab,
         'feats': featsvocab,
         'lemma': lemmavocab,
         'deprel': deprelvocab
     })
     return vocab
Esempio n. 2
0
 def load(self, filename, pretrain):
     """
     Load a model from file, with preloaded pretrain embeddings. Here we allow the pretrain to be None or a dummy input,
     and the actual use of pretrain embeddings will depend on the boolean config "pretrain" in the loaded args.
     """
     try:
         checkpoint = torch.load(filename, lambda storage, loc: storage)
     except BaseException:
         logger.error("Cannot load model from {}".format(filename))
         raise
     self.args = checkpoint['config']
     self.vocab = MultiVocab.load_state_dict(checkpoint['vocab'])
     # load model
     emb_matrix = None
     if self.args['pretrain'] and pretrain is not None: # we use pretrain only if args['pretrain'] == True and pretrain is not None
         emb_matrix = pretrain.emb
     self.model = Parser(self.args, self.vocab, emb_matrix=emb_matrix)
     self.model.load_state_dict(checkpoint['model'], strict=False)