def __setattr__(self, key, value): if key == '_vocabs': conll_idxs = set([ vocab.conll_idx for vocab in value if hasattr(vocab, 'conll_idx') ]) assert len(conll_idxs) == 1 self._conll_idx = list(conll_idxs)[0] super(Multivocab, self).__setattr__(key, value) #*************************************************************** if __name__ == '__main__': """""" from parser.vocabs import PretrainedVocab, WordVocab, CharVocab, Multivocab configurable = Configurable() token_vocab = WordVocab.from_configurable(configurable) pretrained_vocab = PretrainedVocab.from_vocab(token_vocab) subtoken_vocab = CharVocab.from_vocab(token_vocab) multivocab = Multivocab.from_configurable( configurable, [pretrained_vocab, token_vocab, subtoken_vocab]) multivocab.add_files(configurable.valid_files) multivocab.index_tokens() print("Indices for '<PAD>': %s" % str(multivocab.index('<PAD>'))) print("Indices for 'the': %s" % str(multivocab.index('the'))) print("Indices for 'The': %s" % str(multivocab.index('The'))) print('Multivocab passes')
matrix = self.embeddings.eval() with codecs.open(self.name+'.txt', 'w') as f: for idx in xrange(self.START_IDX, len(self)): f.write('%s %s\n' % (self[idx], ' '.join(matrix[idx]))) return #============================================================= @property def pretrained_vocab(self): return self._pretrained_vocab #============================================================= def __setattr__(self, name, value): if name == '_pretrained_vocab': self._str2idx = value._str2idx self._idx2str = value._idx2str self._counts = value._counts super(RetrainedVocab, self).__setattr__(name, value) #*************************************************************** if __name__ == '__main__': """""" from parser import Configurable from parser.vocabs import PretrainedVocab configurable = Configurable(retrained_vocab={'embed_loss':'cross_entropy', 'retrained_embed_size':50}) pretrained_vocab = PretrainedVocab.from_configurable(configurable) retrained_vocab = RetrainedVocab.from_vocab(pretrained_vocab) retrain_loss = retrained_vocab(pretrained_vocab) print('RetrainedVocab passes')