class Trainset(Dataset): PREFIX = 'train' class Parseset(Dataset): PREFIX = 'parse' #*************************************************************** if __name__ == '__main__': """ """ from nparser.vocabs import * from nparser.dataset import Trainset configurable = Configurable() dep_vocab = DepVocab.from_configurable(configurable) word_vocab = WordVocab.from_configurable(configurable) lemma_vocab = LemmaVocab.from_configurable(configurable) pretrained_vocab = PretrainedVocab.from_vocab(word_vocab) char_vocab = NgramMultivocab.from_vocab(word_vocab) word_multivocab = Multivocab.from_configurable( configurable, [word_vocab, pretrained_vocab, char_vocab], name='words') tag_vocab = TagVocab.from_configurable(configurable) xtag_vocab = XTagVocab.from_configurable(configurable) head_vocab = HeadVocab.from_configurable(configurable) rel_vocab = RelVocab.from_configurable(configurable) trainset = Trainset.from_configurable(configurable, [ dep_vocab, word_multivocab, lemma_vocab, tag_vocab, xtag_vocab, head_vocab, rel_vocab ])
matrix = self.embeddings.eval() with codecs.open(self.name+'.txt', 'w') as f: for idx in range(self.START_IDX, len(self)): f.write('%s %s\n' % (self[idx], ' '.join(matrix[idx]))) return #============================================================= @property def pretrained_vocab(self): return self._pretrained_vocab #============================================================= def __setattr__(self, name, value): if name == '_pretrained_vocab': self._str2idx = value._str2idx self._idx2str = value._idx2str self._counts = value._counts super(RetrainedVocab, self).__setattr__(name, value) #*************************************************************** if __name__ == '__main__': """ """ from nparser import Configurable from nparser.vocabs import PretrainedVocab configurable = Configurable(retrained_vocab={'embed_loss':'cross_entropy', 'retrained_embed_size':50}) pretrained_vocab = PretrainedVocab.from_configurable(configurable) retrained_vocab = RetrainedVocab.from_vocab(pretrained_vocab) retrain_loss = retrained_vocab(pretrained_vocab) print('RetrainedVocab passes',file=sys.stderr)