Ejemplo n.º 1
0
    def __init__(self, save_path="."):
        self.dataset_dir = os.path.join(save_path, "datasets/cmrc2018")
        if not os.path.exists(self.dataset_dir):
            file_path = tf.keras.utils.get_file(
                fname="cmrc2018.tar.gz",
                origin=_DATA_URL,
                extract=True,
                cache_dir=save_path,
            )
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))


        self._load_train_examples()
        self._load_dev_examples()
        self._load_test_examples()
Ejemplo n.º 2
0
    def __init__(self, save_path=".", load_df = False):
        self.dataset_dir = os.path.join(save_path, "datasets/msra_ner")
        if not os.path.exists(self.dataset_dir):
            file_path = tf.keras.utils.get_file(
                fname="msra_ner.tar.gz",
                origin=_DATA_URL,
                extract=True,
                cache_dir=save_path,
            )
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        if load_df:
            self._load_dataset_df()
        else:
            self._load_train_examples()
            self._load_test_examples()
            self._load_dev_examples()

        self.label_map = json.load(open(os.path.join(self.dataset_dir, "label_map.json"),"r"))
        self.map_label = {v: k for k, v in self.label_map.items()}