def convert_examples_to_features(self, data_sign): """convert InputExamples to InputFeatures :param data_sign: 'train', 'val' or 'test' :return: features (List[InputFeatures]): """ print("=*=" * 10) print("Loading {} data...".format(data_sign)) # get examples if data_sign == "train": examples = read_examples(self.data_dir, data_sign='train') elif data_sign == "val": examples = read_examples(self.data_dir, data_sign='val') elif data_sign == "test": examples = read_examples(self.data_dir, data_sign='test') else: raise ValueError( "please notice that the data can only be train/val/test !!") # get features # 数据保存路径 cache_path = os.path.join( self.data_dir, "{}.cache.{}".format(data_sign, str(self.max_seq_length))) # 读取数据 if os.path.exists(cache_path) and self.data_cache: features = torch.load(cache_path) else: # 生成数据 features = convert_examples_to_features(self.params, examples, self.tokenizer) # save data if self.data_cache: torch.save(features, cache_path) return features
def get_features(self, data_sign): """convert InputExamples to InputFeatures :param data_sign: 'train', 'val' or 'test' :return: features (List[InputFeatures]): """ print("=*=" * 10) print("Loading {} data...".format(data_sign)) # get examples if data_sign in ("train", "val", "test", "pseudo"): examples = read_examples( os.path.join(self.data_dir, f'{data_sign}.data')) else: raise ValueError( "please notice that the data can only be train/val/test !!") features = convert_examples_to_features(self.params, examples, self.tokenizer, greed_split=False) return features