def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    val_path = os.path.join(dl_manager.manual_dir, 'ILSVRC2012_img_val.tar')
    if not tf.io.gfile.exists(val_path):
      raise AssertionError(
          'ImageNet requires manual download of the data. Please download '
          'the train and val set and place them into: {}'.format(val_path))

    original_labels = _get_original_labels(val_path)

    (multi_labels, problematic_images, imagenet_m_2022_errors
    ) = _get_multi_labels_and_problematic_images(dl_manager)

    imagenet_m_2022 = dict([(k, multi_labels[k]) for k in imagenet_m_2022_errors
                           ])

    return {
        'validation':
            self._generate_examples(
                archive=dl_manager.iter_archive(val_path),
                original_labels=original_labels,
                multi_labels=multi_labels,
                problematic_images=problematic_images),
        'imagenet_m':
            self._generate_examples(
                archive=dl_manager.iter_archive(val_path),
                original_labels=original_labels,
                multi_labels=imagenet_m_2022,
                problematic_images=problematic_images),
    }
Beispiel #2
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        imnet_path = os.path.join(dl_manager.manual_dir,
                                  'ILSVRC2012_img_train.tar')
        noisy_images_path = os.path.join(dl_manager.manual_dir, 'noisy_images')
        noisy_split_path = os.path.join(dl_manager.manual_dir,
                                        'dataset_no_images', 'mini-imagenet',
                                        'split')
        noisy_annot_path = os.path.join(dl_manager.manual_dir,
                                        'dataset_no_images',
                                        'mini-imagenet-annotations.json')
        val_path = os.path.join(dl_manager.manual_dir,
                                'ILSVRC2012_img_val.tar')

        with tf.io.gfile.GFile(noisy_annot_path) as json_file:
            data = json.load(json_file)

        image_data = data['data']
        noisy_image_ids = [elem[0]['image/id'] + '.jpg' for elem in image_data]

        # We first load all mini-ImageNet images in the memory, and
        # will access them for the other splits
        paths = dl_manager.download({
            'mini_train': MINI_IMAGENET_TRAIN,
            'mini_val': MINI_IMAGENET_VAL,
            'mini_test': MINI_IMAGENET_TEST
        })

        train_fnames = self._read_mini_imagenet_csv(paths['mini_train'])
        val_fnames = self._read_mini_imagenet_csv(paths['mini_val'])
        test_fnames = self._read_mini_imagenet_csv(paths['mini_test'])
        mini_imnet_fnames = train_fnames + val_fnames + test_fnames

        mini_imnet_images = self._get_clean_images(
            mini_imnet_fnames, dl_manager.iter_archive(imnet_path))

        val_split_file = os.path.join(noisy_split_path, 'clean_validation')

        split_to_generator = {}

        split_to_generator[
            tfds.Split.VALIDATION] = self._generate_val_examples(
                val_split_file, dl_manager.iter_archive(val_path))

        for percent in _PERCENTS:
            split_name = tfds.Split.TRAIN + '_' + '{:02d}'.format(percent)
            split_file = os.path.join(
                noisy_split_path,
                '{}_noise_nl_{}'.format(self.builder_config.color,
                                        str(percent / 100)))
            split_to_generator[split_name] = self._generate_examples(
                split_file, noisy_image_ids, noisy_images_path,
                mini_imnet_images)

        return split_to_generator
Beispiel #3
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = dl_manager.download(_IMAGENET_SKETCH_URL)
     return [
         tfds.core.SplitGenerator(
             name=tfds.Split.TEST,
             gen_kwargs={
                 'archive': dl_manager.iter_archive(path),
             },
         ),
     ]
Beispiel #4
0
 def _generate_examples(self, dl_manager: tfds.download.DownloadManager,
                        img_path, latent_path, label):
   """Yields examples."""
   with tf.io.gfile.GFile(latent_path, 'rb') as f:
     latents = dict(np.load(f))
   for key in latents:
     latents[key] = latents[key].astype(np.float32)
   for fname, fobj in dl_manager.iter_archive(img_path):
     idx = int(fname[-9:-4]) % 80000
     yield label + '_' + fname[-9:-4], {
         'image': fobj,
         'label': label,
         'illumination': latents['illumination'][idx],
         'pose_mat': latents['pose_mat'][idx],
         'pose_quat': latents['pose_quat'][idx],
     }