def load_data(self, dataset, train, dev, test): assert self.data is None logger = self.logger params = self.params # fmt: off if params.arch == Arch.hardmono: if dataset == Data.sigmorphon17task1: self.data = dataloader.AlignSIGMORPHON2017Task1(train, dev, test, params.shuffle) elif dataset == Data.g2p: self.data = dataloader.AlignStandardG2P(train, dev, test, params.shuffle) elif dataset == Data.news15: self.data = dataloader.AlignTransliteration(train, dev, test, params.shuffle) else: raise ValueError else: if dataset == Data.sigmorphon17task1: if params.indtag: self.data = dataloader.TagSIGMORPHON2017Task1(train, dev, test, params.shuffle) else: self.data = dataloader.SIGMORPHON2017Task1(train, dev, test, params.shuffle) elif dataset == Data.unimorph: if params.indtag: self.data = dataloader.TagUnimorph(train, dev, test, params.shuffle) else: self.data = dataloader.Unimorph(train, dev, test, params.shuffle) elif dataset == Data.sigmorphon19task1: assert isinstance(train, list) and len(train) == 2 and params.indtag self.data = dataloader.TagSIGMORPHON2019Task1(train, dev, test, params.shuffle) elif dataset == Data.sigmorphon19task2: assert params.indtag self.data = dataloader.TagSIGMORPHON2019Task2(train, dev, test, params.shuffle) elif dataset == Data.g2p: self.data = dataloader.StandardG2P(train, dev, test, params.shuffle) elif dataset == Data.p2g: self.data = dataloader.StandardP2G(train, dev, test, params.shuffle) elif dataset == Data.news15: self.data = dataloader.Transliteration(train, dev, test, params.shuffle) elif dataset == Data.histnorm: self.data = dataloader.Histnorm(train, dev, test, params.shuffle) elif dataset == Data.sigmorphon16task1: if params.indtag: self.data = dataloader.TagSIGMORPHON2016Task1(train, dev, test, params.shuffle) else: self.data = dataloader.SIGMORPHON2016Task1(train, dev, test, params.shuffle) elif dataset == Data.lemma: if params.indtag: self.data = dataloader.TagLemmatization(train, dev, test, params.shuffle) else: self.data = dataloader.Lemmatization(train, dev, test, params.shuffle) elif dataset == Data.lemmanotag: self.data = dataloader.LemmatizationNotag(train, dev, test, params.shuffle) else: raise ValueError # fmt: on logger.info("src vocab size %d", self.data.source_vocab_size) logger.info("trg vocab size %d", self.data.target_vocab_size) logger.info("src vocab %r", self.data.source[:500]) logger.info("trg vocab %r", self.data.target[:500])
def load_data(self, dataset, train, dev, test=None, shuffle=False): assert self.data is None logger = self.logger # yapf: disable if dataset == Data.sigmorphon19task1: assert isinstance(train, list) and len(train) in [1, 2] self.data = dataloader.TagSIGMORPHON2019Task1(train, dev, test, shuffle) elif dataset == Data.sigmorphon19task2: assert isinstance(train, list) and len(train) == 1 self.data = dataloader.TagSIGMORPHON2019Task2(train, dev, test, shuffle) else: raise ValueError # yapf: enable logger.info('src vocab size %d', self.data.source_vocab_size) logger.info('trg vocab size %d', self.data.target_vocab_size) logger.info('src vocab %r', self.data.source[:500]) logger.info('trg vocab %r', self.data.target[:500])