def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Download the data and define splits.""" download_server = environ.get('ROAD_OBSTACLE_URL') if download_server is None: raise RuntimeError('Please specify server URL as ROAD_OBSTACLE_URL env variable.') v = self.builder_config.version download_url = download_server + "/dataset_RoadObstacle_{v}.zip".format(v=v) download_dir = dl_manager.download_and_extract(download_url) data_dir = Path(download_dir) / 'dataset_RoadObstacle' splits = json.loads((data_dir / 'splits.json').read_text()) make_split_entry = lambda name, key: SplitGenerator( name=name, gen_kwargs = dict(data_dir=str(data_dir), split=key) ) return [ make_split_entry(tfds.Split.TEST, 'full') ] + [ make_split_entry(k, k) for k in sorted(splits.keys()) ]
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(galsim_cosmos): Downloads the data and defines the splits path = dl_manager.download_and_extract('https://zenodo.org/record/3242143/files/COSMOS_25.2_training_sample.tar.gz') cat = galsim.COSMOSCatalog(dir= path / 'COSMOS_25.2_training_sample') # TODO(galsim_cosmos): Returns the Dict[split names, Iterator[Key, Example]] return {tfds.Split.TRAIN: self._generate_examples(cat)}
def _split_generators(self, dl_manager: tfds.download.DownloadManager): if self.builder_config.v == 1: splits = dl_manager.download({ "train": "https://raw.githubusercontent.com/korquad/korquad.github.io/918f5229639203d741045fdcdbb7462c602887da/dataset/KorQuAD_v1.0_train.json", "dev": "https://raw.githubusercontent.com/korquad/korquad.github.io/918f5229639203d741045fdcdbb7462c602887da/dataset/KorQuAD_v1.0_dev.json", }) return { "train": self._generate_examples(splits["train"]), "dev": self._generate_examples(splits["dev"]), } splits = dl_manager.download_and_extract({ "train": [ f"https://raw.githubusercontent.com/korquad/korquad.github.io/918f5229639203d741045fdcdbb7462c602887da/dataset/KorQuAD_2.1/train/KorQuAD_2.1_train_{i:02}.zip" for i in range(13) ], "dev": [ f"https://raw.githubusercontent.com/korquad/korquad.github.io/918f5229639203d741045fdcdbb7462c602887da/dataset/KorQuAD_2.1/dev/KorQuAD_2.1_dev_{i:02}.zip" for i in range(2) ], }) return { "train": itertools.chain.from_iterable( [self._generate_examples(i) for i in splits["train"]]), "dev": itertools.chain.from_iterable( [self._generate_examples(i) for i in splits["dev"]]), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" data_index_path = dl_manager.download(_INDEX_URL) # Download videos and update paths with GFile(data_index_path, "r") as f: data = json.load(f) if self._builder_config.include_video: paths = self._download_videos(data, dl_manager) for datum in data: for instance in datum["instances"]: instance["video"] = paths[ instance["video_id"]] if instance[ "video_id"] in paths else None if self._builder_config.include_pose == "openpose": pose_path = dl_manager.download_and_extract( _POSE_URLS[self._builder_config.include_pose]) else: pose_path = None return { "train": self._generate_examples(data, pose_path, "train"), "validation": self._generate_examples(data, pose_path, "val"), "test": self._generate_examples(data, pose_path, "test"), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Return SplitGenerators.""" paths = dl_manager.download_and_extract({ 'train-1': 'https://archive.physionet.org/users/shared/challenge-2019/training_setA.zip', # noqa: E501 'train-2': 'https://archive.physionet.org/users/shared/challenge-2019/training_setB.zip' # noqa: E501 }) train_1_path = os.path.join(paths['train-1'], 'training') train_2_path = os.path.join(paths['train-2'], 'training_setB') return [ tfds.core.SplitGenerator( name=tfds.Split.TRAIN, gen_kwargs={ 'data_paths': [train_1_path, train_2_path], 'listfile': os.path.join(RESOURCES, 'train_listfile.csv') } ), tfds.core.SplitGenerator( name=tfds.Split.VALIDATION, gen_kwargs={ 'data_paths': [train_1_path, train_2_path], 'listfile': os.path.join(RESOURCES, 'val_listfile.csv') } ), tfds.core.SplitGenerator( name=tfds.Split.TEST, gen_kwargs={ 'data_paths': [train_1_path, train_2_path], 'listfile': os.path.join(RESOURCES, 'test_listfile.csv') } ) ]
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract(f"{DGL_URL}reddit.zip") return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Download the data and define splits.""" # Download and Extract data.zip extracted_path = dl_manager.download_and_extract( "http://www.patreo.dcc.ufmg.br/wp-content/uploads/2017/11/brazilian_coffee_dataset.zip" ) # dl_manager returns pathlib-like objects with `path.read_text()`, # `path.iterdir()`,... return { 'fold1': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold1'), 'fold2': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold2'), 'fold3': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold3'), 'fold4': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold4'), 'fold5': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold5'), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" name = self.builder_config.name path = dl_manager.download_and_extract( "https://github.com/kaize0409/GCN_AnomalyDetection/raw/master/gae/data/" f"{name}.mat") return {"train": self._generate_examples(path)}
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" url = f"http://snap.stanford.edu/graphsage/{self.builder_config.name}.zip" path = dl_manager.download_and_extract(url) return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" data_dir = dl_manager.download_and_extract( f"{DGL_URL}{self._URL_NAME}.zip") return { "full": self._generate_examples(data_dir), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract( f"https://data.dgl.ai/dataset/{self.builder_config.name}.tgz" ) return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract( "https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz") return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(tensorflowdb): Downloads the data and defines the splits path = dl_manager.download_and_extract('https://github.com/drawwithai/Dataset/raw/main/FormatedImages.zip') # TODO(tensorflowdb): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(path / "FormatedImages"), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(SVBRDF): Downloads the data and defines the splits path = dl_manager.download_and_extract('https://todo-data-url') # TODO(SVBRDF): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(path / 'train_imgs'), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): path = dl_manager.download_and_extract( "https://github.com/shchur/gnn-benchmark/raw/master/data/npz/" f"{self.builder_config.url_name}.npz" ) return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(hi_en): Downloads the data and defines the splits train = dl_manager.download_and_extract( 'https://storage.googleapis.com/ai4b-anuvaad-nmt/test-runs/train.zip' ) val = dl_manager.download_and_extract( 'https://storage.googleapis.com/ai4b-anuvaad-nmt/test-runs/validation.zip' ) # TODO(hi_en): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(source=train / 'train.hi', target=train / 'train.en'), 'validation': self._generate_examples(source=train / 'validation.hi', target=train / 'validation.en') }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract( {"data": "http://sami.haija.org/graph/datasets.tgz"})["data"] return { "full": self._generate_examples(path / "datasets" / self.builder_config.name), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(OneLine45): Downloads the data and defines the splits path = dl_manager.download_and_extract( 'https://github.com/drawwithai/Dataset/raw/main/OneLine_45/OneLine_45.zip' ) # TODO(OneLine45): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(path / 'OneLine_45-train'), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" paths = dl_manager.download_and_extract(_URLS) return [ tfds.core.SplitGenerator( name=patch_size, gen_kwargs={ "path": os.path.join(paths, "test"), "size": patch_size }, ) for patch_size in ["large", "medium", "small"] ]
def _split_generators(self, dl_manager: tfds.download.DownloadManager): splits = dl_manager.download_and_extract( { "train": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-dp-v1/klue-dp-v1_train.tsv", "dev": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-dp-v1/klue-dp-v1_dev.tsv", } ) return { "train": self._generate_examples(splits["train"]), "dev": self._generate_examples(splits["dev"]), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" filename_map = { False: "sae4k_v1.txt", True: "sae4k_v2.txt", # augmented } path = dl_manager.download_and_extract( f"https://raw.githubusercontent.com/warnikchow/sae4k/a3a7a4510ea010d210956ad50e38a61c7c838b0f/data/{filename_map[self.builder_config.augmented]}" ) return { "train": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract(self.builder_config.data_url) if self.builder_config.manual_split is not None: path = _update_split(path, self.builder_config.manual_split) split_fn = self.builder_config.manual_split['split'] return { k: self._generate_examples(v, split_fn[k]) for k, v in path.items() } # TODO(kor_corpora): Returns the Dict[split names, Iterator[Key, Example]] return {k: self._generate_examples(v) for k, v in path.items()}
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" name = self.builder_config.name path = dl_manager.download_and_extract( f"https://zenodo.org/record/3689089/files/botnet_{name}.tar.gz") paths = { "split_path": path / f"{name}_split_idx.pkl", "data_path": path / f"{name}_raw.hdf5", } return { tfds.core.Split.TRAIN: self._generate_examples("train", **paths), tfds.core.Split.VALIDATION: self._generate_examples("val", **paths), tfds.core.Split.TEST: self._generate_examples("test", **paths), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): files = dl_manager.download_and_extract( { "train": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-re-v1/klue-re-v1_train.json", "dev": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-re-v1/klue-re-v1_dev.json", "relation_list": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-re-v1/relation_list.json", } ) with files["relation_list"].open() as f: relations = json.load(f)["relations"] return { "train": self._generate_examples(files["train"], relations=relations), "dev": self._generate_examples(files["dev"], relations=relations), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract(_DL_URL) splits = [] for subdir, split in zip( ["train", "val", "test"], [tfds.Split.TRAIN, tfds.Split.VALIDATION, tfds.Split.TEST], ): splits.append( tfds.core.SplitGenerator( name=split, gen_kwargs={ "path": os.path.join(path, "data_64x64", subdir) }, )) return splits
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" urls = [ _VIDEO_ANNOTATIONS_URL if self._builder_config.include_video else _ANNOTATIONS_URL ] if self._builder_config.include_pose is not None: urls.append(_POSE_URLS[self._builder_config.include_pose]) downloads = dl_manager.download_and_extract(urls) annotations_path = path.join(downloads[0], "PHOENIX-2014-T-release-v3", "PHOENIX-2014-T") if self._builder_config.include_pose == "holistic": pose_path = path.join(downloads[1], "holistic") else: pose_path = None return [ tfds.core.SplitGenerator( name=tfds.Split.VALIDATION, gen_kwargs={ "annotations_path": annotations_path, "pose_path": pose_path, "split": "dev" }, ), tfds.core.SplitGenerator( name=tfds.Split.TEST, gen_kwargs={ "annotations_path": annotations_path, "pose_path": pose_path, "split": "test" }, ), tfds.core.SplitGenerator( name=tfds.Split.TRAIN, gen_kwargs={ "annotations_path": annotations_path, "pose_path": pose_path, "split": "train" }, ), ]
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path_kv = {k:dl_manager.download_and_extract(v) for k, v in self.builder_config.data_url.items()} if not self.builder_config.name.startswith("v1.0"): for k, v in path_kv.items(): file_names = [] for vv in v: file_names.extend(tf.io.gfile.glob(os.path.join(vv, "*.json"))) path_kv[k] = file_names if self.builder_config.manual_split is not None: path_kv = _update_split(path_kv, self.builder_config.manual_split) split_fn = self.builder_config.manual_split['split'] return {k:self._generate_examples(v, split_fn[k]) for k, v in path_kv.items()} # TODO(korquad): Returns the Dict[split names, Iterator[Key, Example]] return {k:self._generate_examples(v) for k, v in path_kv.items()}
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Return SplitGenerators.""" paths = dl_manager.download_and_extract({ 'set-a': 'http://physionet.org/files/challenge-2012/1.0.0/set-a.tar.gz', # noqa: E501 # 'train-1-outcome': 'http://physionet.org/files/challenge-2012/1.0.0/Outcomes-a.txt?download', # noqa: E501 'set-b': 'http://physionet.org/files/challenge-2012/1.0.0/set-b.tar.gz', # noqa: E501 # 'train-2-outcome': 'http://physionet.org/files/challenge-2012/1.0.0/Outcomes-b.txt?download', # noqa: E501 'set-c': 'http://physionet.org/files/challenge-2012/1.0.0/set-c.tar.gz', # noqa: E501 # 'test-outcome': 'http://physionet.org/files/challenge-2012/1.0.0/Outcomes-c.txt?download', # noqa: E501 }) a_path = os.path.join(paths['set-a'], 'set-a') b_path = os.path.join(paths['set-b'], 'set-b') c_path = os.path.join(paths['set-c'], 'set-c') return [ tfds.core.SplitGenerator( name=tfds.Split.TRAIN, gen_kwargs={ 'data_dirs': [a_path, b_path, c_path], 'outcome_file': os.path.join(RESOURCES, 'train_listfile.csv') }, ), tfds.core.SplitGenerator( name=tfds.Split.VALIDATION, gen_kwargs={ 'data_dirs': [a_path, b_path, c_path], 'outcome_file': os.path.join(RESOURCES, 'val_listfile.csv') }, ), tfds.core.SplitGenerator(name=tfds.Split.TEST, gen_kwargs={ 'data_dirs': [a_path, b_path, c_path], 'outcome_file': os.path.join(RESOURCES, 'test_listfile.csv') }) ]
def _split_generators(self, dl_manager: tfds.download.DownloadManager): splits = dl_manager.download_and_extract({ "train": "https://github.com/lovit/namuwikitext/releases/download/v0.3/namuwikitext_20200302.train.zip", "dev": "https://github.com/lovit/namuwikitext/releases/download/v0.3/namuwikitext_20200302.dev.zip", "test": "https://github.com/lovit/namuwikitext/releases/download/v0.3/namuwikitext_20200302.test.zip", }) return { "train": self._generate_examples( splits["train"] / "namuwikitext_20200302.train", "train"), "dev": self._generate_examples( splits["dev"] / "namuwikitext_20200302.dev", "dev"), "test": self._generate_examples( splits["test"] / "namuwikitext_20200302.test", "test"), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(indic_corpus): Downloads the data and defines the splits path = dl_manager.download_and_extract( "https://storage.googleapis.com/ai4bharat-public-indic-nlp-corpora/indiccorp/all_langs/monoling.zip" ) # TODO(indic_corpus): Returns the Dict[split names, Iterator[Key, Example]] splits = [] for lang in self.builder_config.languages: splits.extend([ tfds.core.SplitGenerator( name=lang, gen_kwargs=dict(path=path / f"monoling/{lang}.txt")), tfds.core.SplitGenerator( name=f"{lang}-validation", gen_kwargs=dict(path=path / f"monoling/{lang}-validation.txt"), ), ]) return splits