def _load_restored(self, dataset_path): """Load dataset from restored binary files (train, dev, test). Args: dataset_path (str): path of dataset dir. """ self.text_data, self.idx2token, self.token2idx = load_restored( dataset_path, 'corpus.') self.attribute_data, self.idx2attribute, self.attribute2idx = load_restored( dataset_path, 'attribute.') self.max_vocab_size = len(self.idx2token) self.logger.info("Restore finished!")
def _load_restored(self, dataset_path): """Load dataset from restored binary files (train, dev, test). Args: dataset_path (str): path of dataset dir. """ self.source_text_data, self.source_idx2token, self.source_token2idx = load_restored( dataset_path, self.source_suffix + '.') self.target_text_data, self.target_idx2token, self.target_token2idx = load_restored( dataset_path, self.target_suffix + '.') self.max_source_vocab_size = len(self.source_idx2token) self.max_target_vocab_size = len(self.target_idx2token) self.logger.info("Restore finished!")
def _load_restored(self, dataset_path): """Load dataset from restored binary files (train, dev, test). Args: dataset_path (str): path of dataset dir. """ for group in ['knowledge', 'source', 'target']: if getattr(self, group + '_format') != 'none': text_data = load_restored(dataset_path, group + '.', ignore_file='vocab')[0] setattr(self, group + '_text_data', text_data) idx2token, token2idx = load_restored(dataset_path, ignore_file='data') setattr(self, 'idx2token', idx2token) setattr(self, 'token2idx', token2idx) self.max_vocab_size = len(self.idx2token) self.logger.info("Restore finished!")