Example #1
0
 def get_features(self, data_sign):
     """convert InputExamples to InputFeatures
     :param data_sign: 'train', 'val' or 'test'
     :return: features (List[InputFeatures]):
     """
     print("=*=" * 10)
     print("Loading {} data...".format(data_sign))
     # get examples
     if data_sign in ("train", "val", "test", "pseudo"):
         examples = read_examples(self.data_dir, data_sign=data_sign)
     else:
         raise ValueError(
             "please notice that the data can only be train/val/test!!")
     # get features
     # 数据保存路径
     cache_path = os.path.join(
         self.data_dir, "{}.cache.{}".format(data_sign,
                                             str(self.max_seq_length)))
     # 读取数据
     if os.path.exists(cache_path) and self.data_cache:
         features = torch.load(cache_path)
     else:
         # 生成数据
         features = convert_examples_to_features(self.params,
                                                 examples,
                                                 self.tokenizer,
                                                 greed_split=False)
         # save data
         if self.data_cache:
             torch.save(features, cache_path)
     return features
Example #2
0
    def convert_examples_to_features(self, data_sign):
        """convert to InputFeatures
        :param data_sign: 'train', 'val' or 'test'
        :return: features (List[InputFeatures]):
        """
        print("=*=" * 10)
        print("Loading {} data...".format(data_sign))

        # 数据保存路径
        cache_path = os.path.join(self.data_dir, "mrc-ner.{}.cache.{}".format(data_sign, str(self.max_seq_length)))
        # 读取数据
        if os.path.exists(cache_path) and self.data_cache:
            features = torch.load(cache_path)
        else:
            if data_sign == "train":
                examples = read_mrc_ner_examples(os.path.join(self.data_dir, "train.data"))
            elif data_sign == "val":
                examples = read_mrc_ner_examples(os.path.join(self.data_dir, "val.data"))
            elif data_sign == "test":
                examples = read_mrc_ner_examples(os.path.join(self.data_dir, "test.data"))
            else:
                raise ValueError("please notice that the data can only be train/val/test !!")
            # 生成数据
            features = convert_examples_to_features(self.params, examples, self.tokenizer)
            # save data
            if self.data_cache:
                torch.save(features, cache_path)
        return features
Example #3
0
    def convert_examples_to_features(self, data_sign):
        """convert InputExamples to InputFeatures
        :param data_sign: 'train', 'val' or 'test'
        :return: features (List[InputFeatures]):
        """
        print("=*=" * 10)
        print("Loading {} data...".format(data_sign))

        # get features
        # 数据保存路径
        cache_path = os.path.join(self.data_dir, "{}.cache.{}".format(data_sign, str(self.max_seq_length)))
        # 读取数据
        if os.path.exists(cache_path) and self.data_cache:
            features = torch.load(cache_path)
        else:
            # get examples
            if data_sign == "train":
                examples = read_examples(self.data_dir, data_sign='train')
            elif data_sign == "val":
                examples = read_examples(self.data_dir, data_sign='val')
            elif data_sign == "test":
                examples = read_examples(self.data_dir, data_sign='test')
            else:
                raise ValueError("please notice that the data can only be train/val/test !!")
            # get ngram dict
            ngram_dict = ZenNgramDict(self.params.bert_model_dir, tokenizer=self.tokenizer)
            # 生成数据
            features = convert_examples_to_features(self.params, examples, self.tokenizer, ngram_dict=ngram_dict)
            # save data
            if self.data_cache:
                torch.save(features, cache_path)
        return features
Example #4
0
    def get_features(self, data_sign):
        """convert InputExamples to InputFeatures
        :param data_sign: 'train', 'val' or 'test'
        :return: features (List[InputFeatures]):
        """
        print("=*=" * 10)
        print("Loading {} data...".format(data_sign))
        # get examples
        if data_sign in ("train", "val", "test", "pseudo"):
            examples = read_examples(
                os.path.join(self.data_dir, f'{data_sign}.data'))
        else:
            raise ValueError(
                "please notice that the data can only be train/val/test !!")

        features = convert_examples_to_features(self.params,
                                                examples,
                                                self.tokenizer,
                                                greed_split=False)
        return features