Example #1
0
    def convert_examples_to_features(self, data_sign):
        """convert InputExamples to InputFeatures
        :param data_sign: 'train', 'val' or 'test'
        :return: features (List[InputFeatures]):
        """
        print("=*=" * 10)
        print("Loading {} data...".format(data_sign))

        # get examples
        if data_sign == "train":
            examples = read_examples(self.data_dir, data_sign='train')
        elif data_sign == "val":
            examples = read_examples(self.data_dir, data_sign='val')
        elif data_sign == "test":
            examples = read_examples(self.data_dir, data_sign='test')
        else:
            raise ValueError(
                "please notice that the data can only be train/val/test !!")

        # get features
        # 数据保存路径
        cache_path = os.path.join(
            self.data_dir, "{}.cache.{}".format(data_sign,
                                                str(self.max_seq_length)))
        # 读取数据
        if os.path.exists(cache_path) and self.data_cache:
            features = torch.load(cache_path)
        else:
            # 生成数据
            features = convert_examples_to_features(self.params, examples,
                                                    self.tokenizer)
            # save data
            if self.data_cache:
                torch.save(features, cache_path)
        return features
Example #2
0
    def get_features(self, data_sign):
        """convert InputExamples to InputFeatures
        :param data_sign: 'train', 'val' or 'test'
        :return: features (List[InputFeatures]):
        """
        print("=*=" * 10)
        print("Loading {} data...".format(data_sign))
        # get examples
        if data_sign in ("train", "val", "test", "pseudo"):
            examples = read_examples(
                os.path.join(self.data_dir, f'{data_sign}.data'))
        else:
            raise ValueError(
                "please notice that the data can only be train/val/test !!")

        features = convert_examples_to_features(self.params,
                                                examples,
                                                self.tokenizer,
                                                greed_split=False)
        return features