def init_vocab(self, data): assert self.eval == False # for eval vocab must exist charvocab = CharVocab(data, self.args['shorthand']) wordvocab = WordVocab(data, self.args['shorthand'], cutoff=7, lower=True) uposvocab = WordVocab(data, self.args['shorthand'], idx=1) xposvocab = xpos_vocab_factory(data, self.args['shorthand']) featsvocab = FeatureVocab(data, self.args['shorthand'], idx=3) lemmavocab = WordVocab(data, self.args['shorthand'], cutoff=7, idx=4, lower=True) deprelvocab = WordVocab(data, self.args['shorthand'], idx=6) vocab = MultiVocab({ 'char': charvocab, 'word': wordvocab, 'upos': uposvocab, 'xpos': xposvocab, 'feats': featsvocab, 'lemma': lemmavocab, 'deprel': deprelvocab }) return vocab
def load(self, filename, pretrain): """ Load a model from file, with preloaded pretrain embeddings. Here we allow the pretrain to be None or a dummy input, and the actual use of pretrain embeddings will depend on the boolean config "pretrain" in the loaded args. """ try: checkpoint = torch.load(filename, lambda storage, loc: storage) except BaseException: logger.error("Cannot load model from {}".format(filename)) raise self.args = checkpoint['config'] self.vocab = MultiVocab.load_state_dict(checkpoint['vocab']) # load model emb_matrix = None if self.args['pretrain'] and pretrain is not None: # we use pretrain only if args['pretrain'] == True and pretrain is not None emb_matrix = pretrain.emb self.model = Parser(self.args, self.vocab, emb_matrix=emb_matrix) self.model.load_state_dict(checkpoint['model'], strict=False)