Beispiel #1
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        homepage = 'https://raw.githubusercontent.com/google-research-datasets/answer-equivalence-dataset/main/v1/'
        archive = {
            'train': os.path.join(homepage, 'train.jsonl'),
            'ae_dev': os.path.join(homepage, 'ae_dev.jsonl'),
            'ae_test': os.path.join(homepage, 'ae_test.jsonl'),
            'dev_xlnet': os.path.join(homepage,
                                      'dev_by_system/dev_xlnet.jsonl'),
            'dev_luke': os.path.join(homepage, 'dev_by_system/dev_luke.jsonl'),
            'dev_bidaf': os.path.join(homepage,
                                      'dev_by_system/dev_bidaf.jsonl')
        }

        paths = dl_manager.download_and_extract(archive)

        return {
            split: self._generate_examples(path)
            for split, path in paths.items()
        }
Beispiel #2
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""

    dl_dir = dl_manager.download_and_extract(_LAMBADA_DATASET_URL)

    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            gen_kwargs={
                'filepath':
                    os.path.join(dl_dir, 'lambada_development_plain_text.txt')
            },
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TEST,
            gen_kwargs={
                'filepath': os.path.join(dl_dir, 'lambada_test_plain_text.txt')
            },
        ),
    ]
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    path = dl_manager.download_and_extract(URL)
    subpath = dl_manager.extract(
        os.path.join(path, self.builder_config.language + ".tar.gz"))

    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.VALIDATION,
            gen_kwargs={"filepath": os.path.join(subpath, "dev")},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TEST,
            gen_kwargs={"filepath": os.path.join(subpath, "test")},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            gen_kwargs={"filepath": os.path.join(subpath, "train")},
        ),
    ]
Beispiel #4
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     paths = dl_manager.download_and_extract(_URLS)
     image_dirs = [
         paths['train_images'] / 'train2017',
         paths['validation_images'] / 'val2017',
         paths['test_images'] / 'test2017',
     ]
     return {
         tfds.Split.TRAIN:
         self._generate_examples(
             image_dirs, paths['train_annotation'] / 'lvis_v1_train.json'),
         tfds.Split.VALIDATION:
         self._generate_examples(
             image_dirs,
             paths['validation_annotation'] / 'lvis_v1_val.json'),
         tfds.Split.TEST:
         self._generate_examples(
             image_dirs,
             paths['test_annotation'] / 'lvis_v1_image_info_test_dev.json'),
     }
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = os.path.join(dl_manager.download_and_extract(_DOWNLOAD_URL),
                         'WikiTableQuestions-master')
     examples_sub_dir = os.path.join(path, 'data')
     data_splits = {}
     for split_name in ['train', 'dev']:
         for split_num in range(1, 4):
             examples_file = os.path.join(
                 examples_sub_dir,
                 f'random-split-{split_num}-{split_name}.tsv')
             data_split_key = 'split-{}-{}'.format(split_num, split_name)
             data_splits[data_split_key] = self._generate_examples(
                 examples_path=examples_file, tables_path=path)
     data_splits['train'] = self._generate_examples(
         examples_path=os.path.join(examples_sub_dir, 'training.tsv'),
         tables_path=path)
     data_splits['test'] = self._generate_examples(
         examples_path=os.path.join(examples_sub_dir,
                                    'pristine-unseen-tables.tsv'),
         tables_path=path)
     return data_splits
Beispiel #6
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager
                       ) -> List[tfds.core.SplitGenerator]:
    """Returns SplitGenerators."""
    base_dir = dl_manager.download_and_extract(_DOWNLOAD_URL)
    data_dir = os.path.join(base_dir, _DOWNLOAD_ARCHIVE_SUBDIR)

    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            gen_kwargs={"day_to_paths":
                            _get_day_to_paths(os.path.join(data_dir, "train"))},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.VALIDATION,
            gen_kwargs={"day_to_paths":
                            _get_day_to_paths(os.path.join(data_dir, "dev"))},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TEST,
            gen_kwargs={"day_to_paths":
                            _get_day_to_paths(os.path.join(data_dir, "test"))},
        ),
    ]
Beispiel #7
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
   """Returns SplitGenerators."""
   path = dl_manager.download_and_extract(URL)
   return {tfds.Split.TRAIN: self._generate_examples(path)}
Beispiel #8
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
   """Returns SplitGenerators."""
   extracted = dl_manager.download_and_extract(_URLS)
   return {k: self._generate_examples(v) for k, v in extracted.items()}