def load_or_create_vocabs(self): existing = os.path.join(self.config.experiment_dir, 'vocab_pos') if os.path.exists(existing): vocab = Vocab(file=existing, frozen=True) else: vocab = Vocab(constants=['UNK']) vocab['<pad>'] self.vocabs = ELMOPos(None, None, vocab)
def load_or_create_vocabs(self): vocab_pre = os.path.join(self.config.experiment_dir, 'vocab_') if self.config.share_vocab: vocab = Vocab(constants=self.constants) self.vocabs = CoNLLInflectionFields(src=vocab, tgt=vocab) else: vocab_src = Vocab(constants=self.constants) vocab_tgt = Vocab(constants=self.constants) self.vocabs = CoNLLInflectionFields(src=vocab_src, tgt=vocab_tgt)
def load_or_create_vocabs(self): vocabs = [] for field in ReinflectionFields._asdict().keys(): path = os.path.join(self.config.experiment_dir, 'vocab_{}'.format(field)) vocabs.append(Vocab(file=path, frozen=True)) self.vocabs = ReinflectionFields(*vocabs)
def relabel_target(self): vocab = Vocab(frozen=False, constants=[]) labels = [] for raw in self.train_data.raw: labels.append(vocab[raw.tgt]) self.train_data.mtx.tgt = labels self.train_data.vocabs.tgt = vocab vocab.frozen = True labels = [] for raw in self.dev_data.raw: labels.append(vocab[raw.tgt]) self.dev_data.mtx.tgt = labels self.dev_data.vocabs.tgt = vocab self.train_data.to_idx() self.dev_data.to_idx()
def load_or_create_vocabs(self): vocab_pre = os.path.join(self.config.experiment_dir, 'vocab_') vocabs = Seq2seqFields() vocab_fn = vocab_pre + 'src' if os.path.exists(vocab_fn): vocabs.src = Vocab(file=vocab_fn, frozen=True) else: vocabs.src = Vocab(constants=self.constants) if self.config.share_vocab: vocabs.tgt = vocabs.src else: vocab_fn = vocab_pre + 'tgt' if os.path.exists(vocab_fn): vocabs.tgt = Vocab(file=vocab_fn, frozen=True) else: vocabs.tgt = Vocab(constants=self.constants) self.vocabs = vocabs
def load_or_create_vocabs(self): vocabs = ClassificationFields(None, None, None) existing = getattr(self.config, 'vocab_src', os.path.join(self.config.experiment_dir, 'vocab_src')) if os.path.exists(existing): vocabs.src = Vocab(file=existing, frozen=True) elif getattr(self.config, 'pretrained_embedding', False): vocabs.src = Vocab(file=None, constants=['UNK', 'SOS', 'EOS', 'PAD']) vocabs.src.load_word2vec_format(self.config.pretrained_embedding) else: vocabs.src = Vocab(constants=self.constants) existing = getattr(self.config, 'vocab_tgt', os.path.join(self.config.experiment_dir, 'vocab_tgt')) if os.path.exists(existing): vocabs.tgt = Vocab(file=existing, frozen=True) else: vocabs.tgt = Vocab() self.vocabs = vocabs
def load_or_create_vocabs(self): vocab_pre = os.path.join(self.config.experiment_dir, 'vocab_') self.vocabs = CoNLLInflectionFields( src=Vocab(file=vocab_pre + 'src', frozen=True), tgt=Vocab(file=vocab_pre + 'tgt', frozen=True), )