Ejemplo n.º 1
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Return SplitGenerators."""
        data_file = join(dl_manager.manual_dir,
                         'mimic_benchmarking_phenotyping.tar.gz')
        extracted_path = dl_manager.extract(data_file)
        train_dir = join(extracted_path, 'train')
        train_listfile = join(extracted_path, 'train_listfile.csv')
        val_dir = train_dir
        val_listfile = join(extracted_path, 'val_listfile.csv')
        test_dir = join(extracted_path, 'test')
        test_listfile = join(extracted_path, 'test_listfile.csv')

        return [
            tfds.core.SplitGenerator(name=tfds.Split.TRAIN,
                                     gen_kwargs={
                                         'data_dir': train_dir,
                                         'listfile': train_listfile
                                     }),
            tfds.core.SplitGenerator(name=tfds.Split.VALIDATION,
                                     gen_kwargs={
                                         'data_dir': val_dir,
                                         'listfile': val_listfile
                                     }),
            tfds.core.SplitGenerator(name=tfds.Split.TEST,
                                     gen_kwargs={
                                         'data_dir': test_dir,
                                         'listfile': test_listfile
                                     }),
        ]
Ejemplo n.º 2
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Download the data and define splits."""
     #data_path is a pathlib-like `Path('<manual_dir>/data.zip')`
     archive_path = dl_manager.manual_dir / 'brazilian_cerrado_dataset.zip'
     # Extract the manually downloaded `data.zip`
     extracted_path = dl_manager.extract(archive_path)
     #extracted_path = dl_manager.download_and_extract("https://homepages.dcc.ufmg.br/~keiller.nogueira/datasets/brazilian_cerrado_dataset.zip")
     #   extracted_path =Path('/home/ami-m-017/Documents/MsComputerScience/research')
     # dl_manager returns pathlib-like objects with `path.read_text()`,
     # `path.iterdir()`,...
     return {
         'fold1':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold1'),
         'fold2':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold2'),
         'fold3':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold3'),
         'fold4':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold4'),
         'fold5':
         self._generate_examples(
             path=extracted_path /
             'Brazilian_Cerrado_Savana_Scenes_Dataset/folds/fold5'),
     }
	def _split_generators(self, dl_manager: tfds.download.DownloadManager):
		"""
		downloads and splits data
		Returns SplitGenerators.
		This dataset was created following the guide:
		https://www.tensorflow.org/datasets/add_dataset
		"""

		input_data_path = dl_manager.extract(self.data_source_path)
		images_path = input_data_path / self.pp_param['output_pp_data_dir_name']

		return {
			'train': self._generate_examples(images_path, 'train'),
			'validation': self._generate_examples(images_path, 'val'),
        	'test': self._generate_examples(images_path, 'test'),
				}
Ejemplo n.º 4
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(kappatng): Downloads the data and defines the splits
        data_path = dl_manager.extract(
            os.path.join(dl_manager.manual_dir, self.builder_config.file_name))

        # TODO(kappatng): Returns the Dict[split names, Iterator[Key, Example]]
        return [
            tfds.core.SplitGenerator(
                name=tfds.Split.TRAIN,
                gen_kwargs={
                    "images_dir_path":
                    os.path.join(
                        data_path,
                        "global/cscratch1/sd/jialiu/kappaTNG/COSMOS/"),
                },
            ),
        ]
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """
		downloads and splits data
		Returns SplitGenerators.
		This dataset was created following the guide:
		https://www.tensorflow.org/datasets/add_dataset
		"""

        input_data_path = dl_manager.extract(self.data_source_path)
        images_path = input_data_path / self.pp_param['output_pp_data_dir_name']

        # Create 10 sets where each one have a different degradation level
        # (form 0% degradation to 90% degradation)
        return {
            # 0% degradation
            'train_0': self._generate_examples(images_path, 'train', 0),
            'val_0': self._generate_examples(images_path, 'val', 0),
            'test_0': self._generate_examples(images_path, 'test', 0),

            # 10% degradation
            'train_10': self._generate_examples(images_path, 'train', 0.1),
            'val_10': self._generate_examples(images_path, 'val', 0.1),
            'test_10': self._generate_examples(images_path, 'test', 0.1),

            # 20% degradation
            'train_20': self._generate_examples(images_path, 'train', 0.2),
            'val_20': self._generate_examples(images_path, 'val', 0.2),
            'test_20': self._generate_examples(images_path, 'test', 0.2),

            # 30% degradation
            'train_30': self._generate_examples(images_path, 'train', 0.3),
            'val_30': self._generate_examples(images_path, 'val', 0.3),
            'test_30': self._generate_examples(images_path, 'test', 0.3),

            # 40% degradation
            'train_40': self._generate_examples(images_path, 'train', 0.4),
            'val_40': self._generate_examples(images_path, 'val', 0.4),
            'test_40': self._generate_examples(images_path, 'test', 0.4),

            # 50% degradation
            'train_50': self._generate_examples(images_path, 'train', 0.5),
            'val_50': self._generate_examples(images_path, 'val', 0.5),
            'test_50': self._generate_examples(images_path, 'test', 0.5),

            # 60% degradation
            'train_60': self._generate_examples(images_path, 'train', 0.6),
            'val_60': self._generate_examples(images_path, 'val', 0.6),
            'test_60': self._generate_examples(images_path, 'test', 0.6),

            # 70% degradation
            'train_70': self._generate_examples(images_path, 'train', 0.7),
            'val_70': self._generate_examples(images_path, 'val', 0.7),
            'test_70': self._generate_examples(images_path, 'test', 0.7),

            # 80% degradation
            'train_80': self._generate_examples(images_path, 'train', 0.8),
            'val_80': self._generate_examples(images_path, 'val', 0.8),
            'test_80': self._generate_examples(images_path, 'test', 0.8),

            # 90% degradation
            'train_90': self._generate_examples(images_path, 'train', 0.9),
            'val_90': self._generate_examples(images_path, 'val', 0.9),
            'test_90': self._generate_examples(images_path, 'test', 0.9),
        }