def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Download the data and define splits."""
        
        download_server = environ.get('ROAD_OBSTACLE_URL')
        if download_server is None:
            raise RuntimeError('Please specify server URL as ROAD_OBSTACLE_URL env variable.')

        v = self.builder_config.version
        download_url = download_server + "/dataset_RoadObstacle_{v}.zip".format(v=v)
        download_dir = dl_manager.download_and_extract(download_url)

        data_dir = Path(download_dir) / 'dataset_RoadObstacle'

        splits = json.loads((data_dir / 'splits.json').read_text())

        make_split_entry = lambda name, key: SplitGenerator(
            name=name, 
            gen_kwargs = dict(data_dir=str(data_dir), split=key)
        )

        return [
            make_split_entry(tfds.Split.TEST, 'full')
        ] + [
            make_split_entry(k, k)
            for k in sorted(splits.keys())
        ]
Example #2
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
   """Returns SplitGenerators."""
   # TODO(galsim_cosmos): Downloads the data and defines the splits
   path = dl_manager.download_and_extract('https://zenodo.org/record/3242143/files/COSMOS_25.2_training_sample.tar.gz')
   cat = galsim.COSMOSCatalog(dir= path / 'COSMOS_25.2_training_sample')
   # TODO(galsim_cosmos): Returns the Dict[split names, Iterator[Key, Example]]
   return {tfds.Split.TRAIN: self._generate_examples(cat)}
Example #3
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        if self.builder_config.v == 1:
            splits = dl_manager.download({
                "train":
                "https://raw.githubusercontent.com/korquad/korquad.github.io/918f5229639203d741045fdcdbb7462c602887da/dataset/KorQuAD_v1.0_train.json",
                "dev":
                "https://raw.githubusercontent.com/korquad/korquad.github.io/918f5229639203d741045fdcdbb7462c602887da/dataset/KorQuAD_v1.0_dev.json",
            })

            return {
                "train": self._generate_examples(splits["train"]),
                "dev": self._generate_examples(splits["dev"]),
            }

        splits = dl_manager.download_and_extract({
            "train": [
                f"https://raw.githubusercontent.com/korquad/korquad.github.io/918f5229639203d741045fdcdbb7462c602887da/dataset/KorQuAD_2.1/train/KorQuAD_2.1_train_{i:02}.zip"
                for i in range(13)
            ],
            "dev": [
                f"https://raw.githubusercontent.com/korquad/korquad.github.io/918f5229639203d741045fdcdbb7462c602887da/dataset/KorQuAD_2.1/dev/KorQuAD_2.1_dev_{i:02}.zip"
                for i in range(2)
            ],
        })
        return {
            "train":
            itertools.chain.from_iterable(
                [self._generate_examples(i) for i in splits["train"]]),
            "dev":
            itertools.chain.from_iterable(
                [self._generate_examples(i) for i in splits["dev"]]),
        }
Example #4
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""

        data_index_path = dl_manager.download(_INDEX_URL)

        # Download videos and update paths
        with GFile(data_index_path, "r") as f:
            data = json.load(f)

            if self._builder_config.include_video:
                paths = self._download_videos(data, dl_manager)
                for datum in data:
                    for instance in datum["instances"]:
                        instance["video"] = paths[
                            instance["video_id"]] if instance[
                                "video_id"] in paths else None

        if self._builder_config.include_pose == "openpose":
            pose_path = dl_manager.download_and_extract(
                _POSE_URLS[self._builder_config.include_pose])
        else:
            pose_path = None

        return {
            "train": self._generate_examples(data, pose_path, "train"),
            "validation": self._generate_examples(data, pose_path, "val"),
            "test": self._generate_examples(data, pose_path, "test"),
        }
Example #5
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Return SplitGenerators."""
        paths = dl_manager.download_and_extract({
            'train-1': 'https://archive.physionet.org/users/shared/challenge-2019/training_setA.zip',  # noqa: E501
            'train-2': 'https://archive.physionet.org/users/shared/challenge-2019/training_setB.zip'  # noqa: E501
        })
        train_1_path = os.path.join(paths['train-1'], 'training')
        train_2_path = os.path.join(paths['train-2'], 'training_setB')

        return [
            tfds.core.SplitGenerator(
                name=tfds.Split.TRAIN,
                gen_kwargs={
                    'data_paths': [train_1_path, train_2_path],
                    'listfile': os.path.join(RESOURCES, 'train_listfile.csv')
                }
            ),
            tfds.core.SplitGenerator(
                name=tfds.Split.VALIDATION,
                gen_kwargs={
                    'data_paths': [train_1_path, train_2_path],
                    'listfile': os.path.join(RESOURCES, 'val_listfile.csv')
                }
            ),
            tfds.core.SplitGenerator(
                name=tfds.Split.TEST,
                gen_kwargs={
                    'data_paths': [train_1_path, train_2_path],
                    'listfile': os.path.join(RESOURCES, 'test_listfile.csv')
                }
            )
        ]
Example #6
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        path = dl_manager.download_and_extract(f"{DGL_URL}reddit.zip")

        return {
            "full": self._generate_examples(path),
        }
Example #7
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Download the data and define splits."""
     # Download and Extract data.zip
     extracted_path = dl_manager.download_and_extract(
         "http://www.patreo.dcc.ufmg.br/wp-content/uploads/2017/11/brazilian_coffee_dataset.zip"
     )
     # dl_manager returns pathlib-like objects with `path.read_text()`,
     # `path.iterdir()`,...
     return {
         'fold1':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold1'),
         'fold2':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold2'),
         'fold3':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold3'),
         'fold4':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold4'),
         'fold5':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold5'),
     }
Example #8
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        name = self.builder_config.name

        path = dl_manager.download_and_extract(
            "https://github.com/kaize0409/GCN_AnomalyDetection/raw/master/gae/data/"
            f"{name}.mat")
        return {"train": self._generate_examples(path)}
Example #9
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        url = f"http://snap.stanford.edu/graphsage/{self.builder_config.name}.zip"
        path = dl_manager.download_and_extract(url)

        return {
            "full": self._generate_examples(path),
        }
Example #10
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        data_dir = dl_manager.download_and_extract(
            f"{DGL_URL}{self._URL_NAME}.zip")

        return {
            "full": self._generate_examples(data_dir),
        }
Example #11
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = dl_manager.download_and_extract(
         f"https://data.dgl.ai/dataset/{self.builder_config.name}.tgz"
     )
     return {
         "full": self._generate_examples(path),
     }
Example #12
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        path = dl_manager.download_and_extract(
            "https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz")

        return {
            "full": self._generate_examples(path),
        }
Example #13
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    # TODO(tensorflowdb): Downloads the data and defines the splits
    path = dl_manager.download_and_extract('https://github.com/drawwithai/Dataset/raw/main/FormatedImages.zip')

    # TODO(tensorflowdb): Returns the Dict[split names, Iterator[Key, Example]]
    return {
        'train': self._generate_examples(path / "FormatedImages"),
    }
Example #14
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(SVBRDF): Downloads the data and defines the splits
        path = dl_manager.download_and_extract('https://todo-data-url')

        # TODO(SVBRDF): Returns the Dict[split names, Iterator[Key, Example]]
        return {
            'train': self._generate_examples(path / 'train_imgs'),
        }
Example #15
0
        def _split_generators(self, dl_manager: tfds.download.DownloadManager):
            path = dl_manager.download_and_extract(
                "https://github.com/shchur/gnn-benchmark/raw/master/data/npz/"
                f"{self.builder_config.url_name}.npz"
            )

            return {
                "full": self._generate_examples(path),
            }
Example #16
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(hi_en): Downloads the data and defines the splits
        train = dl_manager.download_and_extract(
            'https://storage.googleapis.com/ai4b-anuvaad-nmt/test-runs/train.zip'
        )
        val = dl_manager.download_and_extract(
            'https://storage.googleapis.com/ai4b-anuvaad-nmt/test-runs/validation.zip'
        )

        # TODO(hi_en): Returns the Dict[split names, Iterator[Key, Example]]
        return {
            'train':
            self._generate_examples(source=train / 'train.hi',
                                    target=train / 'train.en'),
            'validation':
            self._generate_examples(source=train / 'validation.hi',
                                    target=train / 'validation.en')
        }
Example #17
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        path = dl_manager.download_and_extract(
            {"data": "http://sami.haija.org/graph/datasets.tgz"})["data"]

        return {
            "full":
            self._generate_examples(path / "datasets" /
                                    self.builder_config.name),
        }
Example #18
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(OneLine45): Downloads the data and defines the splits
        path = dl_manager.download_and_extract(
            'https://github.com/drawwithai/Dataset/raw/main/OneLine_45/OneLine_45.zip'
        )

        # TODO(OneLine45): Returns the Dict[split names, Iterator[Key, Example]]
        return {
            'train': self._generate_examples(path / 'OneLine_45-train'),
        }
Example #19
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     paths = dl_manager.download_and_extract(_URLS)
     return [
         tfds.core.SplitGenerator(
             name=patch_size,
             gen_kwargs={
                 "path": os.path.join(paths, "test"),
                 "size": patch_size
             },
         ) for patch_size in ["large", "medium", "small"]
     ]
Example #20
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        splits = dl_manager.download_and_extract(
            {
                "train": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-dp-v1/klue-dp-v1_train.tsv",
                "dev": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-dp-v1/klue-dp-v1_dev.tsv",
            }
        )

        return {
            "train": self._generate_examples(splits["train"]),
            "dev": self._generate_examples(splits["dev"]),
        }
Example #21
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        filename_map = {
            False: "sae4k_v1.txt",
            True: "sae4k_v2.txt",  # augmented
        }
        path = dl_manager.download_and_extract(
            f"https://raw.githubusercontent.com/warnikchow/sae4k/a3a7a4510ea010d210956ad50e38a61c7c838b0f/data/{filename_map[self.builder_config.augmented]}"
        )

        return {
            "train": self._generate_examples(path),
        }
Example #22
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        path = dl_manager.download_and_extract(self.builder_config.data_url)

        if self.builder_config.manual_split is not None:
            path = _update_split(path, self.builder_config.manual_split)
            split_fn = self.builder_config.manual_split['split']
            return {
                k: self._generate_examples(v, split_fn[k])
                for k, v in path.items()
            }

        # TODO(kor_corpora): Returns the Dict[split names, Iterator[Key, Example]]
        return {k: self._generate_examples(v) for k, v in path.items()}
Example #23
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     name = self.builder_config.name
     path = dl_manager.download_and_extract(
         f"https://zenodo.org/record/3689089/files/botnet_{name}.tar.gz")
     paths = {
         "split_path": path / f"{name}_split_idx.pkl",
         "data_path": path / f"{name}_raw.hdf5",
     }
     return {
         tfds.core.Split.TRAIN: self._generate_examples("train", **paths),
         tfds.core.Split.VALIDATION:
         self._generate_examples("val", **paths),
         tfds.core.Split.TEST: self._generate_examples("test", **paths),
     }
Example #24
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        files = dl_manager.download_and_extract(
            {
                "train": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-re-v1/klue-re-v1_train.json",
                "dev": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-re-v1/klue-re-v1_dev.json",
                "relation_list": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-re-v1/relation_list.json",
            }
        )

        with files["relation_list"].open() as f:
            relations = json.load(f)["relations"]

        return {
            "train": self._generate_examples(files["train"], relations=relations),
            "dev": self._generate_examples(files["dev"], relations=relations),
        }
Example #25
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = dl_manager.download_and_extract(_DL_URL)
     splits = []
     for subdir, split in zip(
         ["train", "val", "test"],
         [tfds.Split.TRAIN, tfds.Split.VALIDATION, tfds.Split.TEST],
     ):
         splits.append(
             tfds.core.SplitGenerator(
                 name=split,
                 gen_kwargs={
                     "path": os.path.join(path, "data_64x64", subdir)
                 },
             ))
     return splits
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""

        urls = [
            _VIDEO_ANNOTATIONS_URL
            if self._builder_config.include_video else _ANNOTATIONS_URL
        ]

        if self._builder_config.include_pose is not None:
            urls.append(_POSE_URLS[self._builder_config.include_pose])

        downloads = dl_manager.download_and_extract(urls)
        annotations_path = path.join(downloads[0], "PHOENIX-2014-T-release-v3",
                                     "PHOENIX-2014-T")

        if self._builder_config.include_pose == "holistic":
            pose_path = path.join(downloads[1], "holistic")
        else:
            pose_path = None

        return [
            tfds.core.SplitGenerator(
                name=tfds.Split.VALIDATION,
                gen_kwargs={
                    "annotations_path": annotations_path,
                    "pose_path": pose_path,
                    "split": "dev"
                },
            ),
            tfds.core.SplitGenerator(
                name=tfds.Split.TEST,
                gen_kwargs={
                    "annotations_path": annotations_path,
                    "pose_path": pose_path,
                    "split": "test"
                },
            ),
            tfds.core.SplitGenerator(
                name=tfds.Split.TRAIN,
                gen_kwargs={
                    "annotations_path": annotations_path,
                    "pose_path": pose_path,
                    "split": "train"
                },
            ),
        ]
Example #27
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""

    path_kv = {k:dl_manager.download_and_extract(v) for k, v in self.builder_config.data_url.items()}
    if not self.builder_config.name.startswith("v1.0"):
      for k, v in path_kv.items():
        file_names = []
        for vv in v:
          file_names.extend(tf.io.gfile.glob(os.path.join(vv, "*.json")))
        path_kv[k] = file_names

    if self.builder_config.manual_split is not None:
      path_kv = _update_split(path_kv, self.builder_config.manual_split)
      split_fn = self.builder_config.manual_split['split']
      return {k:self._generate_examples(v, split_fn[k]) for k, v in path_kv.items()}

    # TODO(korquad): Returns the Dict[split names, Iterator[Key, Example]]
    return {k:self._generate_examples(v) for k, v in path_kv.items()}
Example #28
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Return SplitGenerators."""
        paths = dl_manager.download_and_extract({
            'set-a':
            'http://physionet.org/files/challenge-2012/1.0.0/set-a.tar.gz',  # noqa: E501
            # 'train-1-outcome': 'http://physionet.org/files/challenge-2012/1.0.0/Outcomes-a.txt?download',  # noqa: E501
            'set-b':
            'http://physionet.org/files/challenge-2012/1.0.0/set-b.tar.gz',  # noqa: E501
            # 'train-2-outcome': 'http://physionet.org/files/challenge-2012/1.0.0/Outcomes-b.txt?download',  # noqa: E501
            'set-c':
            'http://physionet.org/files/challenge-2012/1.0.0/set-c.tar.gz',  # noqa: E501
            # 'test-outcome': 'http://physionet.org/files/challenge-2012/1.0.0/Outcomes-c.txt?download',  # noqa: E501
        })
        a_path = os.path.join(paths['set-a'], 'set-a')
        b_path = os.path.join(paths['set-b'], 'set-b')
        c_path = os.path.join(paths['set-c'], 'set-c')

        return [
            tfds.core.SplitGenerator(
                name=tfds.Split.TRAIN,
                gen_kwargs={
                    'data_dirs': [a_path, b_path, c_path],
                    'outcome_file': os.path.join(RESOURCES,
                                                 'train_listfile.csv')
                },
            ),
            tfds.core.SplitGenerator(
                name=tfds.Split.VALIDATION,
                gen_kwargs={
                    'data_dirs': [a_path, b_path, c_path],
                    'outcome_file': os.path.join(RESOURCES, 'val_listfile.csv')
                },
            ),
            tfds.core.SplitGenerator(name=tfds.Split.TEST,
                                     gen_kwargs={
                                         'data_dirs': [a_path, b_path, c_path],
                                         'outcome_file':
                                         os.path.join(RESOURCES,
                                                      'test_listfile.csv')
                                     })
        ]
Example #29
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        splits = dl_manager.download_and_extract({
            "train":
            "https://github.com/lovit/namuwikitext/releases/download/v0.3/namuwikitext_20200302.train.zip",
            "dev":
            "https://github.com/lovit/namuwikitext/releases/download/v0.3/namuwikitext_20200302.dev.zip",
            "test":
            "https://github.com/lovit/namuwikitext/releases/download/v0.3/namuwikitext_20200302.test.zip",
        })

        return {
            "train":
            self._generate_examples(
                splits["train"] / "namuwikitext_20200302.train", "train"),
            "dev":
            self._generate_examples(
                splits["dev"] / "namuwikitext_20200302.dev", "dev"),
            "test":
            self._generate_examples(
                splits["test"] / "namuwikitext_20200302.test", "test"),
        }
Example #30
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(indic_corpus): Downloads the data and defines the splits
        path = dl_manager.download_and_extract(
            "https://storage.googleapis.com/ai4bharat-public-indic-nlp-corpora/indiccorp/all_langs/monoling.zip"
        )

        # TODO(indic_corpus): Returns the Dict[split names, Iterator[Key, Example]]
        splits = []
        for lang in self.builder_config.languages:
            splits.extend([
                tfds.core.SplitGenerator(
                    name=lang,
                    gen_kwargs=dict(path=path / f"monoling/{lang}.txt")),
                tfds.core.SplitGenerator(
                    name=f"{lang}-validation",
                    gen_kwargs=dict(path=path /
                                    f"monoling/{lang}-validation.txt"),
                ),
            ])
        return splits