def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Download the data and define splits.""" # Download and Extract data.zip extracted_path = dl_manager.download_and_extract( "http://www.patreo.dcc.ufmg.br/wp-content/uploads/2017/11/brazilian_coffee_dataset.zip" ) # dl_manager returns pathlib-like objects with `path.read_text()`, # `path.iterdir()`,... return { 'fold1': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold1'), 'fold2': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold2'), 'fold3': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold3'), 'fold4': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold4'), 'fold5': self._generate_examples(path=extracted_path / 'brazilian_coffee_scenes/fold5'), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Return SplitGenerators.""" data_file = join(dl_manager.manual_dir, 'mimic_benchmarking_phenotyping.tar.gz') extracted_path = dl_manager.extract(data_file) train_dir = join(extracted_path, 'train') train_listfile = join(extracted_path, 'train_listfile.csv') val_dir = train_dir val_listfile = join(extracted_path, 'val_listfile.csv') test_dir = join(extracted_path, 'test') test_listfile = join(extracted_path, 'test_listfile.csv') return [ tfds.core.SplitGenerator(name=tfds.Split.TRAIN, gen_kwargs={ 'data_dir': train_dir, 'listfile': train_listfile }), tfds.core.SplitGenerator(name=tfds.Split.VALIDATION, gen_kwargs={ 'data_dir': val_dir, 'listfile': val_listfile }), tfds.core.SplitGenerator(name=tfds.Split.TEST, gen_kwargs={ 'data_dir': test_dir, 'listfile': test_listfile }), ]
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # Downloads the data and defines the splits # dl_manager is a tfds.download.DownloadManager that can be used to # download and extract URLs filenames = { "train_data": _TRAIN_IMAGES_FILENAME, "train_labels": _TRAIN_LABELS_FILENAME, "test_data": _TEST_IMAGES_FILENAME, "test_labels": _TEST_LABELS_FILENAME, } files = dl_manager.download({ data_type: urllib.parse.urljoin(_URL, filename) for data_type, filename in filenames.items() }) return [ tfds.core.SplitGenerator(name=tfds.Split.TRAIN, gen_kwargs=dict( num_examples=_TRAIN_EXAMPLES, images_path=files["train_data"], label_path=files["train_labels"])), tfds.core.SplitGenerator(name=tfds.Split.TEST, gen_kwargs=dict( num_examples=_TEST_EXAMPLES, images_path=files["test_data"], label_path=files["test_labels"])) ]
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(galsim_cosmos): Downloads the data and defines the splits path = dl_manager.download_and_extract('https://zenodo.org/record/3242143/files/COSMOS_25.2_training_sample.tar.gz') cat = galsim.COSMOSCatalog(dir= path / 'COSMOS_25.2_training_sample') # TODO(galsim_cosmos): Returns the Dict[split names, Iterator[Key, Example]] return {tfds.Split.TRAIN: self._generate_examples(cat)}
def _split_generators(self, dl_manager: tfds.download.DownloadManager): url = f"https://raw.githubusercontent.com/naver/nlp-challenge/a51654472e0da75cd37c6e73ffe583db78e68323/missions/{self.builder_config.name}/data/train/train_data" train_file = dl_manager.download(url) return { "train": self._generate_examples(train_file), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract(f"{DGL_URL}reddit.zip") return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Download the data and define splits.""" #data_path is a pathlib-like `Path('<manual_dir>/data.zip')` archive_path = dl_manager.manual_dir / 'brazilian_cerrado_dataset.zip' # Extract the manually downloaded `data.zip` extracted_path = dl_manager.extract(archive_path) #extracted_path = dl_manager.download_and_extract("https://homepages.dcc.ufmg.br/~keiller.nogueira/datasets/brazilian_cerrado_dataset.zip") # extracted_path =Path('/home/ami-m-017/Documents/MsComputerScience/research') # dl_manager returns pathlib-like objects with `path.read_text()`, # `path.iterdir()`,... return { 'fold1': self._generate_examples( path=extracted_path / 'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold1'), 'fold2': self._generate_examples( path=extracted_path / 'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold2'), 'fold3': self._generate_examples( path=extracted_path / 'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold3'), 'fold4': self._generate_examples( path=extracted_path / 'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold4'), 'fold5': self._generate_examples( path=extracted_path / 'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold5'), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Return SplitGenerators.""" paths = dl_manager.download_and_extract({ 'train-1': 'https://archive.physionet.org/users/shared/challenge-2019/training_setA.zip', # noqa: E501 'train-2': 'https://archive.physionet.org/users/shared/challenge-2019/training_setB.zip' # noqa: E501 }) train_1_path = os.path.join(paths['train-1'], 'training') train_2_path = os.path.join(paths['train-2'], 'training_setB') return [ tfds.core.SplitGenerator( name=tfds.Split.TRAIN, gen_kwargs={ 'data_paths': [train_1_path, train_2_path], 'listfile': os.path.join(RESOURCES, 'train_listfile.csv') } ), tfds.core.SplitGenerator( name=tfds.Split.VALIDATION, gen_kwargs={ 'data_paths': [train_1_path, train_2_path], 'listfile': os.path.join(RESOURCES, 'val_listfile.csv') } ), tfds.core.SplitGenerator( name=tfds.Split.TEST, gen_kwargs={ 'data_paths': [train_1_path, train_2_path], 'listfile': os.path.join(RESOURCES, 'test_listfile.csv') } ) ]
def _split_generators(self, dl_manager: tfds.download.DownloadManager): train_filepath = dl_manager.download( "https://raw.githubusercontent.com/songys/Chatbot_data/a22e508811b5040eead0be5a89c27ef3780d4e82/ChatbotData%20.csv" ) return { "train": self._generate_examples(train_filepath), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract( f"https://data.dgl.ai/dataset/{self.builder_config.name}.tgz" ) return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract( "https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz") return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): train_split = dl_manager.download( "https://raw.githubusercontent.com/warnikchow/paraKQC/c16270fe6c2e888af07e7cb043248ad31d8a6f9c/data/paraKQC_v1.txt" ) return { "train": self._generate_examples(train_split), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" name = self.builder_config.name path = dl_manager.download_and_extract( "https://github.com/kaize0409/GCN_AnomalyDetection/raw/master/gae/data/" f"{name}.mat") return {"train": self._generate_examples(path)}
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" data_dir = dl_manager.download_and_extract( f"{DGL_URL}{self._URL_NAME}.zip") return { "full": self._generate_examples(data_dir), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" url = f"http://snap.stanford.edu/graphsage/{self.builder_config.name}.zip" path = dl_manager.download_and_extract(url) return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(tensorflowdb): Downloads the data and defines the splits path = dl_manager.download_and_extract('https://github.com/drawwithai/Dataset/raw/main/FormatedImages.zip') # TODO(tensorflowdb): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(path / "FormatedImages"), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): path = dl_manager.download_and_extract( "https://github.com/shchur/gnn-benchmark/raw/master/data/npz/" f"{self.builder_config.url_name}.npz" ) return { "full": self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(SVBRDF): Downloads the data and defines the splits path = dl_manager.download_and_extract('https://todo-data-url') # TODO(SVBRDF): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(path / 'train_imgs'), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.download_and_extract( {"data": "http://sami.haija.org/graph/datasets.tgz"})["data"] return { "full": self._generate_examples(path / "datasets" / self.builder_config.name), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(hi_en): Downloads the data and defines the splits train = dl_manager.download_and_extract( 'https://storage.googleapis.com/ai4b-anuvaad-nmt/test-runs/train.zip' ) val = dl_manager.download_and_extract( 'https://storage.googleapis.com/ai4b-anuvaad-nmt/test-runs/validation.zip' ) # TODO(hi_en): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(source=train / 'train.hi', target=train / 'train.en'), 'validation': self._generate_examples(source=train / 'validation.hi', target=train / 'validation.en') }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(netflix_shows): Downloads the data and defines the splits path = dl_manager.download(_URL) # TODO(netflix_shows): Returns the Dict[split names, Iterator[Key, Example]] return [ tfds.core.SplitGenerator(name=tfds.Split.TRAIN, gen_kwargs={"path": path}), ]
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(financial_sentiment_dataset): Downloads the data and defines the splits path = dl_manager.download_kaggle_data( 'ankurzing/sentiment-analysis-for-financial-news') # TODO(financial_sentiment_dataset): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(path=os.path.join(path, 'all-data.csv')), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(OneLine45): Downloads the data and defines the splits path = dl_manager.download_and_extract( 'https://github.com/drawwithai/Dataset/raw/main/OneLine_45/OneLine_45.zip' ) # TODO(OneLine45): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(path / 'OneLine_45-train'), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): files = dl_manager.download( [ f"https://raw.githubusercontent.com/theeluwin/sci-news-sum-kr-50/aca0583651503c1cdfa8ef0bc2ef0976250a33ca/data/{index:02d}.json" for index in range(1, 51) ] ) return { "dev": self._generate_examples(files), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(bitcoin_prediction_dataset): Downloads the data and defines the splits path = dl_manager.download( 'https://raw.githubusercontent.com/johann-su/ai_tradebot/main/data/financial_data/price_prediction_dataset.csv' ) # TODO(bitcoin_prediction_dataset): Returns the Dict[split names, Iterator[Key, Example]] return { 'train': self._generate_examples(path), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): splits = dl_manager.download( { "train": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-nli-v1/klue-nli-v1_train.json", "dev": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-nli-v1/klue-nli-v1_dev.json", } ) return { "train": self._generate_examples(splits["train"]), "dev": self._generate_examples(splits["dev"]), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): # ls | sort > filelist.txt # 위 명령어로 filelist.txt 생성 후 넣어줌 with tf.io.gfile.GFile( os.path.join(os.path.dirname(__file__), "filelist.txt")) as f: files = f.readlines() train_files = dl_manager.download([ f"https://raw.githubusercontent.com/kmounlp/NER/1e557de738b8e6215c7cacac116e735518c0f680/말뭉치%20-%20형태소_개체명/{filename.strip()}" for filename in files ]) return {"train": self._generate_examples(train_files)}
def _split_generators( dl_manager: tfds.download.DownloadManager, ) -> Dict[str, Iterator[Tuple[str, Dict[str, Union[Path, str]]]]]: """Returns SplitGenerators.""" path = dl_manager.download_kaggle_data(_KAGGLE_DATA) path /= "MURA-v1.1" return { "train": Mura._generate_examples(path / "train"), "valid": Mura._generate_examples(path / "valid"), }
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" # TODO(blood_quality): Downloads the data and defines the splits path = os.path.join(dl_manager.manual_dir, self.builder_config.dataset) if not tf.io.gfile.exists(path): raise AssertionError( f'You must download the dataset .zip file and place it into {dl_manager.manual_dir}' ) path_iter = dl_manager.iter_archive(path) return {'train': self._generate_examples(path_iter)}
def _split_generators(self, dl_manager: tfds.download.DownloadManager): splits = dl_manager.download({ "train": "https://raw.githubusercontent.com/e9t/nsmc/cc0670e872d4ac27bfe36c87456783004b39ef6c/ratings_train.txt", "test": "https://raw.githubusercontent.com/e9t/nsmc/cc0670e872d4ac27bfe36c87456783004b39ef6c/ratings_test.txt", }) return { "train": self._generate_examples(splits["train"]), "test": self._generate_examples(splits["test"]), }