예제 #1
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Download the data and define splits."""
     # Download and Extract data.zip
     extracted_path = dl_manager.download_and_extract(
         "http://www.patreo.dcc.ufmg.br/wp-content/uploads/2017/11/brazilian_coffee_dataset.zip"
     )
     # dl_manager returns pathlib-like objects with `path.read_text()`,
     # `path.iterdir()`,...
     return {
         'fold1':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold1'),
         'fold2':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold2'),
         'fold3':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold3'),
         'fold4':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold4'),
         'fold5':
         self._generate_examples(path=extracted_path /
                                 'brazilian_coffee_scenes/fold5'),
     }
예제 #2
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Return SplitGenerators."""
        data_file = join(dl_manager.manual_dir,
                         'mimic_benchmarking_phenotyping.tar.gz')
        extracted_path = dl_manager.extract(data_file)
        train_dir = join(extracted_path, 'train')
        train_listfile = join(extracted_path, 'train_listfile.csv')
        val_dir = train_dir
        val_listfile = join(extracted_path, 'val_listfile.csv')
        test_dir = join(extracted_path, 'test')
        test_listfile = join(extracted_path, 'test_listfile.csv')

        return [
            tfds.core.SplitGenerator(name=tfds.Split.TRAIN,
                                     gen_kwargs={
                                         'data_dir': train_dir,
                                         'listfile': train_listfile
                                     }),
            tfds.core.SplitGenerator(name=tfds.Split.VALIDATION,
                                     gen_kwargs={
                                         'data_dir': val_dir,
                                         'listfile': val_listfile
                                     }),
            tfds.core.SplitGenerator(name=tfds.Split.TEST,
                                     gen_kwargs={
                                         'data_dir': test_dir,
                                         'listfile': test_listfile
                                     }),
        ]
예제 #3
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # Downloads the data and defines the splits
        # dl_manager is a tfds.download.DownloadManager that can be used to
        # download and extract URLs

        filenames = {
            "train_data": _TRAIN_IMAGES_FILENAME,
            "train_labels": _TRAIN_LABELS_FILENAME,
            "test_data": _TEST_IMAGES_FILENAME,
            "test_labels": _TEST_LABELS_FILENAME,
        }
        files = dl_manager.download({
            data_type: urllib.parse.urljoin(_URL, filename)
            for data_type, filename in filenames.items()
        })

        return [
            tfds.core.SplitGenerator(name=tfds.Split.TRAIN,
                                     gen_kwargs=dict(
                                         num_examples=_TRAIN_EXAMPLES,
                                         images_path=files["train_data"],
                                         label_path=files["train_labels"])),
            tfds.core.SplitGenerator(name=tfds.Split.TEST,
                                     gen_kwargs=dict(
                                         num_examples=_TEST_EXAMPLES,
                                         images_path=files["test_data"],
                                         label_path=files["test_labels"]))
        ]
예제 #4
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
   """Returns SplitGenerators."""
   # TODO(galsim_cosmos): Downloads the data and defines the splits
   path = dl_manager.download_and_extract('https://zenodo.org/record/3242143/files/COSMOS_25.2_training_sample.tar.gz')
   cat = galsim.COSMOSCatalog(dir= path / 'COSMOS_25.2_training_sample')
   # TODO(galsim_cosmos): Returns the Dict[split names, Iterator[Key, Example]]
   return {tfds.Split.TRAIN: self._generate_examples(cat)}
예제 #5
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        url = f"https://raw.githubusercontent.com/naver/nlp-challenge/a51654472e0da75cd37c6e73ffe583db78e68323/missions/{self.builder_config.name}/data/train/train_data"
        train_file = dl_manager.download(url)

        return {
            "train": self._generate_examples(train_file),
        }
예제 #6
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        path = dl_manager.download_and_extract(f"{DGL_URL}reddit.zip")

        return {
            "full": self._generate_examples(path),
        }
예제 #7
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Download the data and define splits."""
     #data_path is a pathlib-like `Path('<manual_dir>/data.zip')`
     archive_path = dl_manager.manual_dir / 'brazilian_cerrado_dataset.zip'
     # Extract the manually downloaded `data.zip`
     extracted_path = dl_manager.extract(archive_path)
     #extracted_path = dl_manager.download_and_extract("https://homepages.dcc.ufmg.br/~keiller.nogueira/datasets/brazilian_cerrado_dataset.zip")
     #   extracted_path =Path('/home/ami-m-017/Documents/MsComputerScience/research')
     # dl_manager returns pathlib-like objects with `path.read_text()`,
     # `path.iterdir()`,...
     return {
         'fold1':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold1'),
         'fold2':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold2'),
         'fold3':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold3'),
         'fold4':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold4'),
         'fold5':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold5'),
     }
예제 #8
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Return SplitGenerators."""
        paths = dl_manager.download_and_extract({
            'train-1': 'https://archive.physionet.org/users/shared/challenge-2019/training_setA.zip',  # noqa: E501
            'train-2': 'https://archive.physionet.org/users/shared/challenge-2019/training_setB.zip'  # noqa: E501
        })
        train_1_path = os.path.join(paths['train-1'], 'training')
        train_2_path = os.path.join(paths['train-2'], 'training_setB')

        return [
            tfds.core.SplitGenerator(
                name=tfds.Split.TRAIN,
                gen_kwargs={
                    'data_paths': [train_1_path, train_2_path],
                    'listfile': os.path.join(RESOURCES, 'train_listfile.csv')
                }
            ),
            tfds.core.SplitGenerator(
                name=tfds.Split.VALIDATION,
                gen_kwargs={
                    'data_paths': [train_1_path, train_2_path],
                    'listfile': os.path.join(RESOURCES, 'val_listfile.csv')
                }
            ),
            tfds.core.SplitGenerator(
                name=tfds.Split.TEST,
                gen_kwargs={
                    'data_paths': [train_1_path, train_2_path],
                    'listfile': os.path.join(RESOURCES, 'test_listfile.csv')
                }
            )
        ]
예제 #9
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        train_filepath = dl_manager.download(
            "https://raw.githubusercontent.com/songys/Chatbot_data/a22e508811b5040eead0be5a89c27ef3780d4e82/ChatbotData%20.csv"
        )

        return {
            "train": self._generate_examples(train_filepath),
        }
예제 #10
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = dl_manager.download_and_extract(
         f"https://data.dgl.ai/dataset/{self.builder_config.name}.tgz"
     )
     return {
         "full": self._generate_examples(path),
     }
예제 #11
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        path = dl_manager.download_and_extract(
            "https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz")

        return {
            "full": self._generate_examples(path),
        }
예제 #12
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        train_split = dl_manager.download(
            "https://raw.githubusercontent.com/warnikchow/paraKQC/c16270fe6c2e888af07e7cb043248ad31d8a6f9c/data/paraKQC_v1.txt"
        )

        return {
            "train": self._generate_examples(train_split),
        }
예제 #13
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        name = self.builder_config.name

        path = dl_manager.download_and_extract(
            "https://github.com/kaize0409/GCN_AnomalyDetection/raw/master/gae/data/"
            f"{name}.mat")
        return {"train": self._generate_examples(path)}
예제 #14
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        data_dir = dl_manager.download_and_extract(
            f"{DGL_URL}{self._URL_NAME}.zip")

        return {
            "full": self._generate_examples(data_dir),
        }
예제 #15
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        url = f"http://snap.stanford.edu/graphsage/{self.builder_config.name}.zip"
        path = dl_manager.download_and_extract(url)

        return {
            "full": self._generate_examples(path),
        }
예제 #16
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    # TODO(tensorflowdb): Downloads the data and defines the splits
    path = dl_manager.download_and_extract('https://github.com/drawwithai/Dataset/raw/main/FormatedImages.zip')

    # TODO(tensorflowdb): Returns the Dict[split names, Iterator[Key, Example]]
    return {
        'train': self._generate_examples(path / "FormatedImages"),
    }
예제 #17
0
        def _split_generators(self, dl_manager: tfds.download.DownloadManager):
            path = dl_manager.download_and_extract(
                "https://github.com/shchur/gnn-benchmark/raw/master/data/npz/"
                f"{self.builder_config.url_name}.npz"
            )

            return {
                "full": self._generate_examples(path),
            }
예제 #18
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(SVBRDF): Downloads the data and defines the splits
        path = dl_manager.download_and_extract('https://todo-data-url')

        # TODO(SVBRDF): Returns the Dict[split names, Iterator[Key, Example]]
        return {
            'train': self._generate_examples(path / 'train_imgs'),
        }
예제 #19
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        path = dl_manager.download_and_extract(
            {"data": "http://sami.haija.org/graph/datasets.tgz"})["data"]

        return {
            "full":
            self._generate_examples(path / "datasets" /
                                    self.builder_config.name),
        }
예제 #20
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(hi_en): Downloads the data and defines the splits
        train = dl_manager.download_and_extract(
            'https://storage.googleapis.com/ai4b-anuvaad-nmt/test-runs/train.zip'
        )
        val = dl_manager.download_and_extract(
            'https://storage.googleapis.com/ai4b-anuvaad-nmt/test-runs/validation.zip'
        )

        # TODO(hi_en): Returns the Dict[split names, Iterator[Key, Example]]
        return {
            'train':
            self._generate_examples(source=train / 'train.hi',
                                    target=train / 'train.en'),
            'validation':
            self._generate_examples(source=train / 'validation.hi',
                                    target=train / 'validation.en')
        }
예제 #21
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(netflix_shows): Downloads the data and defines the splits
        path = dl_manager.download(_URL)

        # TODO(netflix_shows): Returns the Dict[split names, Iterator[Key, Example]]
        return [
            tfds.core.SplitGenerator(name=tfds.Split.TRAIN,
                                     gen_kwargs={"path": path}),
        ]
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(financial_sentiment_dataset): Downloads the data and defines the splits
        path = dl_manager.download_kaggle_data(
            'ankurzing/sentiment-analysis-for-financial-news')

        # TODO(financial_sentiment_dataset): Returns the Dict[split names, Iterator[Key, Example]]
        return {
            'train':
            self._generate_examples(path=os.path.join(path, 'all-data.csv')),
        }
예제 #23
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(OneLine45): Downloads the data and defines the splits
        path = dl_manager.download_and_extract(
            'https://github.com/drawwithai/Dataset/raw/main/OneLine_45/OneLine_45.zip'
        )

        # TODO(OneLine45): Returns the Dict[split names, Iterator[Key, Example]]
        return {
            'train': self._generate_examples(path / 'OneLine_45-train'),
        }
예제 #24
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        files = dl_manager.download(
            [
                f"https://raw.githubusercontent.com/theeluwin/sci-news-sum-kr-50/aca0583651503c1cdfa8ef0bc2ef0976250a33ca/data/{index:02d}.json"
                for index in range(1, 51)
            ]
        )

        return {
            "dev": self._generate_examples(files),
        }
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(bitcoin_prediction_dataset): Downloads the data and defines the splits
        path = dl_manager.download(
            'https://raw.githubusercontent.com/johann-su/ai_tradebot/main/data/financial_data/price_prediction_dataset.csv'
        )

        # TODO(bitcoin_prediction_dataset): Returns the Dict[split names, Iterator[Key, Example]]
        return {
            'train': self._generate_examples(path),
        }
예제 #26
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        splits = dl_manager.download(
            {
                "train": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-nli-v1/klue-nli-v1_train.json",
                "dev": "https://raw.githubusercontent.com/KLUE-benchmark/KLUE/ab22cd5cfdd6b527a9a4e2d177f9dacb85ddde2c/klue_benchmark/klue-nli-v1/klue-nli-v1_dev.json",
            }
        )

        return {
            "train": self._generate_examples(splits["train"]),
            "dev": self._generate_examples(splits["dev"]),
        }
예제 #27
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        # ls | sort > filelist.txt
        # 위 명령어로 filelist.txt 생성 후 넣어줌
        with tf.io.gfile.GFile(
                os.path.join(os.path.dirname(__file__), "filelist.txt")) as f:
            files = f.readlines()
        train_files = dl_manager.download([
            f"https://raw.githubusercontent.com/kmounlp/NER/1e557de738b8e6215c7cacac116e735518c0f680/말뭉치%20-%20형태소_개체명/{filename.strip()}"
            for filename in files
        ])

        return {"train": self._generate_examples(train_files)}
예제 #28
0
    def _split_generators(
        dl_manager: tfds.download.DownloadManager,
    ) -> Dict[str, Iterator[Tuple[str, Dict[str, Union[Path, str]]]]]:
        """Returns SplitGenerators."""

        path = dl_manager.download_kaggle_data(_KAGGLE_DATA)
        path /= "MURA-v1.1"

        return {
            "train": Mura._generate_examples(path / "train"),
            "valid": Mura._generate_examples(path / "valid"),
        }
예제 #29
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(blood_quality): Downloads the data and defines the splits
        path = os.path.join(dl_manager.manual_dir, self.builder_config.dataset)

        if not tf.io.gfile.exists(path):
            raise AssertionError(
                f'You must download the dataset .zip file and place it into {dl_manager.manual_dir}'
            )

        path_iter = dl_manager.iter_archive(path)
        return {'train': self._generate_examples(path_iter)}
예제 #30
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        splits = dl_manager.download({
            "train":
            "https://raw.githubusercontent.com/e9t/nsmc/cc0670e872d4ac27bfe36c87456783004b39ef6c/ratings_train.txt",
            "test":
            "https://raw.githubusercontent.com/e9t/nsmc/cc0670e872d4ac27bfe36c87456783004b39ef6c/ratings_test.txt",
        })

        return {
            "train": self._generate_examples(splits["train"]),
            "test": self._generate_examples(splits["test"]),
        }