Example #1
0
 def load_data(self, dataset, train, dev, test):
     assert self.data is None
     logger = self.logger
     params = self.params
     # fmt: off
     if params.arch == Arch.hardmono:
         if dataset == Data.sigmorphon17task1:
             self.data = dataloader.AlignSIGMORPHON2017Task1(train, dev, test, params.shuffle)
         elif dataset == Data.g2p:
             self.data = dataloader.AlignStandardG2P(train, dev, test, params.shuffle)
         elif dataset == Data.news15:
             self.data = dataloader.AlignTransliteration(train, dev, test, params.shuffle)
         else:
             raise ValueError
     else:
         if dataset == Data.sigmorphon17task1:
             if params.indtag:
                 self.data = dataloader.TagSIGMORPHON2017Task1(train, dev, test, params.shuffle)
             else:
                 self.data = dataloader.SIGMORPHON2017Task1(train, dev, test, params.shuffle)
         elif dataset == Data.unimorph:
             if params.indtag:
                 self.data = dataloader.TagUnimorph(train, dev, test, params.shuffle)
             else:
                 self.data = dataloader.Unimorph(train, dev, test, params.shuffle)
         elif dataset == Data.sigmorphon19task1:
             assert isinstance(train, list) and len(train) == 2 and params.indtag
             self.data = dataloader.TagSIGMORPHON2019Task1(train, dev, test, params.shuffle)
         elif dataset == Data.sigmorphon19task2:
             assert params.indtag
             self.data = dataloader.TagSIGMORPHON2019Task2(train, dev, test, params.shuffle)
         elif dataset == Data.g2p:
             self.data = dataloader.StandardG2P(train, dev, test, params.shuffle)
         elif dataset == Data.p2g:
             self.data = dataloader.StandardP2G(train, dev, test, params.shuffle)
         elif dataset == Data.news15:
             self.data = dataloader.Transliteration(train, dev, test, params.shuffle)
         elif dataset == Data.histnorm:
             self.data = dataloader.Histnorm(train, dev, test, params.shuffle)
         elif dataset == Data.sigmorphon16task1:
             if params.indtag:
                 self.data = dataloader.TagSIGMORPHON2016Task1(train, dev, test, params.shuffle)
             else:
                 self.data = dataloader.SIGMORPHON2016Task1(train, dev, test, params.shuffle)
         elif dataset == Data.lemma:
             if params.indtag:
                 self.data = dataloader.TagLemmatization(train, dev, test, params.shuffle)
             else:
                 self.data = dataloader.Lemmatization(train, dev, test, params.shuffle)
         elif dataset == Data.lemmanotag:
             self.data = dataloader.LemmatizationNotag(train, dev, test, params.shuffle)
         else:
             raise ValueError
     # fmt: on
     logger.info("src vocab size %d", self.data.source_vocab_size)
     logger.info("trg vocab size %d", self.data.target_vocab_size)
     logger.info("src vocab %r", self.data.source[:500])
     logger.info("trg vocab %r", self.data.target[:500])
 def load_data(self, dataset, train, dev, test=None, shuffle=False):
     assert self.data is None
     logger = self.logger
     # yapf: disable
     if dataset == Data.sigmorphon19task1:
         assert isinstance(train, list) and len(train) in [1, 2]
         self.data = dataloader.TagSIGMORPHON2019Task1(train, dev, test, shuffle)
     elif dataset == Data.sigmorphon19task2:
         assert isinstance(train, list) and len(train) == 1
         self.data = dataloader.TagSIGMORPHON2019Task2(train, dev, test, shuffle)
     else:
         raise ValueError
     # yapf: enable
     logger.info('src vocab size %d', self.data.source_vocab_size)
     logger.info('trg vocab size %d', self.data.target_vocab_size)
     logger.info('src vocab %r', self.data.source[:500])
     logger.info('trg vocab %r', self.data.target[:500])