Exemple #1
0
class Trainset(Dataset):
    PREFIX = 'train'


class Parseset(Dataset):
    PREFIX = 'parse'


#***************************************************************
if __name__ == '__main__':
    """ """

    from nparser.vocabs import *
    from nparser.dataset import Trainset

    configurable = Configurable()
    dep_vocab = DepVocab.from_configurable(configurable)
    word_vocab = WordVocab.from_configurable(configurable)
    lemma_vocab = LemmaVocab.from_configurable(configurable)
    pretrained_vocab = PretrainedVocab.from_vocab(word_vocab)
    char_vocab = NgramMultivocab.from_vocab(word_vocab)
    word_multivocab = Multivocab.from_configurable(
        configurable, [word_vocab, pretrained_vocab, char_vocab], name='words')
    tag_vocab = TagVocab.from_configurable(configurable)
    xtag_vocab = XTagVocab.from_configurable(configurable)
    head_vocab = HeadVocab.from_configurable(configurable)
    rel_vocab = RelVocab.from_configurable(configurable)
    trainset = Trainset.from_configurable(configurable, [
        dep_vocab, word_multivocab, lemma_vocab, tag_vocab, xtag_vocab,
        head_vocab, rel_vocab
    ])
Exemple #2
0
    matrix = self.embeddings.eval()
    with codecs.open(self.name+'.txt', 'w') as f:
      for idx in range(self.START_IDX, len(self)):
        f.write('%s %s\n' % (self[idx], ' '.join(matrix[idx])))
    return
  
  #=============================================================
  @property
  def pretrained_vocab(self):
    return self._pretrained_vocab
  
  #=============================================================
  def __setattr__(self, name, value):
    if name == '_pretrained_vocab':
      self._str2idx = value._str2idx
      self._idx2str = value._idx2str
      self._counts = value._counts
    super(RetrainedVocab, self).__setattr__(name, value)

#***************************************************************
if __name__ == '__main__':
  """ """
  
  from nparser import Configurable
  from nparser.vocabs import PretrainedVocab
  configurable = Configurable(retrained_vocab={'embed_loss':'cross_entropy', 'retrained_embed_size':50})
  pretrained_vocab = PretrainedVocab.from_configurable(configurable)
  retrained_vocab = RetrainedVocab.from_vocab(pretrained_vocab)
  retrain_loss = retrained_vocab(pretrained_vocab)
  print('RetrainedVocab passes',file=sys.stderr)