예제 #1
0
 def _load_restored(self, dataset_path):
     """Load dataset from restored binary files (train, dev, test).
     Args:
         dataset_path (str): path of dataset dir.
     """
     self.text_data, self.idx2token, self.token2idx = load_restored(
         dataset_path, 'corpus.')
     self.attribute_data, self.idx2attribute, self.attribute2idx = load_restored(
         dataset_path, 'attribute.')
     self.max_vocab_size = len(self.idx2token)
     self.logger.info("Restore finished!")
예제 #2
0
    def _load_restored(self, dataset_path):
        """Load dataset from restored binary files (train, dev, test).

        Args:
            dataset_path (str): path of dataset dir.
        """
        self.source_text_data, self.source_idx2token, self.source_token2idx = load_restored(
            dataset_path, self.source_suffix + '.')
        self.target_text_data, self.target_idx2token, self.target_token2idx = load_restored(
            dataset_path, self.target_suffix + '.')
        self.max_source_vocab_size = len(self.source_idx2token)
        self.max_target_vocab_size = len(self.target_idx2token)
        self.logger.info("Restore finished!")
예제 #3
0
 def _load_restored(self, dataset_path):
     """Load dataset from restored binary files (train, dev, test).
     Args:
         dataset_path (str): path of dataset dir.
     """
     for group in ['knowledge', 'source', 'target']:
         if getattr(self, group + '_format') != 'none':
             text_data = load_restored(dataset_path,
                                       group + '.',
                                       ignore_file='vocab')[0]
             setattr(self, group + '_text_data', text_data)
     idx2token, token2idx = load_restored(dataset_path, ignore_file='data')
     setattr(self, 'idx2token', idx2token)
     setattr(self, 'token2idx', token2idx)
     self.max_vocab_size = len(self.idx2token)
     self.logger.info("Restore finished!")