def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    val_path = os.path.join(dl_manager.manual_dir, 'ILSVRC2012_img_val.tar')
    if not tf.io.gfile.exists(val_path):
      raise AssertionError(
          'ImageNet requires manual download of the data. Please download '
          'the train and val set and place them into: {}'.format(val_path))

    original_labels = _get_original_labels(val_path)

    (multi_labels, problematic_images, imagenet_m_2022_errors
    ) = _get_multi_labels_and_problematic_images(dl_manager)

    imagenet_m_2022 = dict([(k, multi_labels[k]) for k in imagenet_m_2022_errors
                           ])

    return {
        'validation':
            self._generate_examples(
                archive=dl_manager.iter_archive(val_path),
                original_labels=original_labels,
                multi_labels=multi_labels,
                problematic_images=problematic_images),
        'imagenet_m':
            self._generate_examples(
                archive=dl_manager.iter_archive(val_path),
                original_labels=original_labels,
                multi_labels=imagenet_m_2022,
                problematic_images=problematic_images),
    }
Example #2
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        imnet_path = os.path.join(dl_manager.manual_dir,
                                  'ILSVRC2012_img_train.tar')
        noisy_images_path = os.path.join(dl_manager.manual_dir, 'noisy_images')
        noisy_split_path = os.path.join(dl_manager.manual_dir,
                                        'dataset_no_images', 'mini-imagenet',
                                        'split')
        noisy_annot_path = os.path.join(dl_manager.manual_dir,
                                        'dataset_no_images',
                                        'mini-imagenet-annotations.json')
        val_path = os.path.join(dl_manager.manual_dir,
                                'ILSVRC2012_img_val.tar')

        with tf.io.gfile.GFile(noisy_annot_path) as json_file:
            data = json.load(json_file)

        image_data = data['data']
        noisy_image_ids = [elem[0]['image/id'] + '.jpg' for elem in image_data]

        # We first load all mini-ImageNet images in the memory, and
        # will access them for the other splits
        paths = dl_manager.download({
            'mini_train': MINI_IMAGENET_TRAIN,
            'mini_val': MINI_IMAGENET_VAL,
            'mini_test': MINI_IMAGENET_TEST
        })

        train_fnames = self._read_mini_imagenet_csv(paths['mini_train'])
        val_fnames = self._read_mini_imagenet_csv(paths['mini_val'])
        test_fnames = self._read_mini_imagenet_csv(paths['mini_test'])
        mini_imnet_fnames = train_fnames + val_fnames + test_fnames

        mini_imnet_images = self._get_clean_images(
            mini_imnet_fnames, dl_manager.iter_archive(imnet_path))

        val_split_file = os.path.join(noisy_split_path, 'clean_validation')

        split_to_generator = {}

        split_to_generator[
            tfds.Split.VALIDATION] = self._generate_val_examples(
                val_split_file, dl_manager.iter_archive(val_path))

        for percent in _PERCENTS:
            split_name = tfds.Split.TRAIN + '_' + '{:02d}'.format(percent)
            split_file = os.path.join(
                noisy_split_path,
                '{}_noise_nl_{}'.format(self.builder_config.color,
                                        str(percent / 100)))
            split_to_generator[split_name] = self._generate_examples(
                split_file, noisy_image_ids, noisy_images_path,
                mini_imnet_images)

        return split_to_generator
Example #3
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = dl_manager.download(_IMAGENET_SKETCH_URL)
     return [
         tfds.core.SplitGenerator(
             name=tfds.Split.TEST,
             gen_kwargs={
                 'archive': dl_manager.iter_archive(path),
             },
         ),
     ]
Example #4
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    '''RETURNS SplitGenerators'''
    dl_paths = dl.manager.download_kaggle_data('Wikipedia sentences')

    data_dir = dl_manager.download({
            'sentences_train': dl_paths['wikisent2.txt'],
    })

    txt_path = data_dir
    
    with tf.io.gfile.GFile(txt_path, 'r') as f:
      text = f.read()

    # Since there's no official split, putting everything under training split

    return [
            tfds.core.SplitGenerator(
                name = tfds.Split.TRAIN,
                gen_kwargs = {
                    'split_key' : 'train',
                    'split_text' : text,
                },
            ),
    ]

    def _generate_examples(self, split_key, split_text):
      each_sentence = iter(text)
      for index, text in enumerate(each_sentence):
        yield index, {"text": split_text}
  def _split_generators(self, dl_manager: tfds.download.DownloadManager
                       ) -> List[tfds.core.SplitGenerator]:
    """Returns SplitGenerators."""
    base_dir = dl_manager.download_and_extract(
        tfds.download.Resource(
            url=_DOWNLOAD_URL,
            # Specify extract method manually as filename reported by github.com
            # misses the .zip extension so auto-detection doesn't work.
            extract_method=tfds.download.ExtractMethod.ZIP))
    data_dir = os.path.join(base_dir, _DOWNLOAD_ARCHIVE_SUBDIR)

    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            gen_kwargs={"day_to_paths":
                            _get_day_to_paths(os.path.join(data_dir, "train"))},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.VALIDATION,
            gen_kwargs={"day_to_paths":
                            _get_day_to_paths(os.path.join(data_dir, "dev"))},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TEST,
            gen_kwargs={"day_to_paths":
                            _get_day_to_paths(os.path.join(data_dir, "test"))},
        ),
    ]
Example #6
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""

        manually_downloaded_files = {
            'test_all_frames': dl_manager.manual_dir / 'test_all_frames.zip',
            'test_annotations': dl_manager.manual_dir / 'test.json',
            'train_all_frames': dl_manager.manual_dir / 'train_all_frames.zip',
            'train_annotations': dl_manager.manual_dir / 'train.json',
            'valid_all_frames': dl_manager.manual_dir / 'valid_all_frames.zip',
            'valid_annotations': dl_manager.manual_dir / 'valid.json',
        }

        extracted_files = dl_manager.extract(manually_downloaded_files)

        return {
            tfds.Split.TRAIN:
            self._generate_examples(
                annotations=extracted_files['train_annotations'],
                all_frames=extracted_files['train_all_frames'] /
                'train_all_frames' / 'JPEGImages',
            ),
            tfds.Split.VALIDATION:
            self._generate_examples(
                annotations=extracted_files['valid_annotations'],
                all_frames=extracted_files['valid_all_frames'] /
                'valid_all_frames' / 'JPEGImages',
            ),
            tfds.Split.TEST:
            self._generate_examples(
                annotations=extracted_files['test_annotations'],
                all_frames=extracted_files['test_all_frames'] /
                'test_all_frames' / 'JPEGImages',
            ),
        }
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
   """Returns SplitGenerators."""
   path = dl_manager.download_and_extract(_DS_PATH)
   return {
       'train': self._generate_examples(path, '.dev'),
       'test': self._generate_examples(path, '.test'),
   }
Example #8
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = dl_manager.download_and_extract({'file': _URL})
     return {
         'database': self._generate_examples(path, True),
         'test': self._generate_examples(path, False),
     }
Example #9
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""

    data = dl_manager.download_and_extract({
        'train': _VIDEO_URL + '1-TAO_TRAIN.zip',
        'val': _VIDEO_URL + '2-TAO_VAL.zip',
        'annotations': _ANNOTATIONS_URL
    })

    manual_train, manual_val = _maybe_prepare_manual_data(dl_manager)
    id_map = _get_category_id_map(data['annotations'] / 'annotations-1.2')

    return {
        tfds.Split.TRAIN:
            self._generate_examples(
                data_path=data['train'],
                manual_path=manual_train,
                annotations_path=data['annotations'] / 'annotations-1.2' /
                'train.json',
                id_map=id_map),
        tfds.Split.VALIDATION:
            self._generate_examples(
                data_path=data['val'],
                manual_path=manual_val,
                annotations_path=data['annotations'] / 'annotations-1.2' /
                'validation.json',
                id_map=id_map)
    }
Example #10
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        data_dir = dl_manager.download_and_extract(
            self.builder_config.data_urls)
        split_generators = {}

        if 'train' in self.builder_config.data_urls:
            split_generators.update({
                tfds.Split.TRAIN:
                self._generate_examples(path=data_dir['train'], split='train'),
            })

        if 'validation' in self.builder_config.data_urls:
            split_generators.update({
                tfds.Split.VALIDATION:
                self._generate_examples(path=data_dir['validation'],
                                        split='validation'),
            })

        if 'test' in self.builder_config.data_urls:
            split_generators.update({
                tfds.Split.TEST:
                self._generate_examples(path=data_dir['test'], split='test'),
            })
        return split_generators
def _get_multi_labels_and_problematic_images(
    dl_manager: tfds.download.DownloadManager):
  """Returns multi-labels and problematic images from download json.

  Args:
    dl_manager: tfds.download.DownloadManager for downloading the json file

  Returns:
    val_annotated_images: Dictionary mapping image name to an inner dictionary
      containing the multi_label annotations for that image. The inner multi-
      label annotation dictionary has keys 'correct', 'wrong', or 'unclear'
      (keys will be missing if the image does not have a set of labels of the
      given type) and values that are lists of wnids.
    problematic_images: List of image names for problematic images.
    imagenet_m_2022:  List of image names comprising ImageNet-M 2022 evaluation
      slice.
  """
  with tf.io.gfile.GFile(dl_manager.download(_MULTI_LABELS_URL), 'r') as f:
    human_accuracy_data = json.load(f)
  val_annotated_images = {}
  prefix = 'ILSVRC2012_val_'
  len_prefix = len(prefix)
  for image_name in human_accuracy_data['initial_annots'].keys():
    if image_name[:len_prefix] == prefix:
      val_annotated_images[image_name] = human_accuracy_data['initial_annots'][
          image_name]

  problematic_images = list(human_accuracy_data['problematic_images'].keys())
  imagenet_m_2022 = human_accuracy_data['imagenet_m']
  return val_annotated_images, problematic_images, imagenet_m_2022
  def _split_generators(
      self, dl_manager: tfds.download.DownloadManager
  ) -> List[tfds.core.SplitGenerator]:
    """Returns SplitGenerators."""
    base_dir = dl_manager.download_and_extract(_DOWNLOAD_URL)
    data_dir = os.path.join(base_dir, _DOWNLOAD_ARCHIVE_SUBDIR)

    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            gen_kwargs={
                "day_to_paths":
                    _get_day_to_paths(os.path.join(data_dir, "train"))
            },
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.VALIDATION,
            gen_kwargs={
                "day_to_paths": _get_day_to_paths(
                    os.path.join(data_dir, "dev"))
            },
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TEST,
            gen_kwargs={
                "day_to_paths":
                    _get_day_to_paths(os.path.join(data_dir, "test"))
            },
        ),
    ]
Example #13
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""

        if self.builder_config.full_resolution:
            trainval_data = dl_manager.download_and_extract(
                _URL + 'DAVIS-2017-trainval-Full-Resolution.zip')
        else:
            trainval_data = dl_manager.download_and_extract(
                _URL + 'DAVIS-2017-trainval-480p.zip')

        train_files = trainval_data / 'DAVIS/ImageSets/2017/train.txt'
        val_files = trainval_data / 'DAVIS/ImageSets/2017/val.txt'

        return {
            tfds.Split.TRAIN: self._generate_examples(train_files),
            tfds.Split.VALIDATION: self._generate_examples(val_files)
        }
Example #14
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    path = dl_manager.download_and_extract(
        'http://go.criteo.net/criteo-research-uplift-v2.1.csv.gz')

    return {
        'train': self._generate_examples(path),
    }
Example #15
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = dl_manager.download_and_extract(_DOWNLOAD_URLS)
     return {
         'train': self._generate_examples(path['train']),
         'validation': self._generate_examples(path['validation']),
         'test': self._generate_examples(path['test'])
     }
Example #16
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        path = dl_manager.download_and_extract(_DATA_LINK)

        return {
            "train": self._generate_examples(path / "drop_dataset_train.json"),
            "dev": self._generate_examples(path / "drop_dataset_dev.json"),
        }
Example #17
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        img_download_url = f'{self._BASE_URL}/{self.builder_config.img_url}'
        self.img_path = dl_manager.download_and_extract(img_download_url)

        domain = self.builder_config.name
        train_split_url = f'{self._BASE_URL}/domainnet/txt/{domain}_train.txt'
        test_split_url = f'{self._BASE_URL}/domainnet/txt/{domain}_test.txt'

        self.splits = dl_manager.download({
            'train': train_split_url,
            'test': test_split_url
        })

        return {
            'train': self._generate_examples(split='train'),
            'test': self._generate_examples(split='test'),
        }
Example #18
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""

    path = dl_manager.download_and_extract(self.builder_config.data_path)
    return {
        split_name: self._generate_examples(
            path / self.builder_config.name, split_name=split_name)
        for split_name in self.builder_config.splits_names
    }
Example #19
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = dl_manager.download_and_extract({
         'file_path':
         self._DATA_PATHS[self.builder_config.name],
     })
     return {
         'train': self._generate_examples(path),
     }
Example #20
0
 def _split_generators(
     self, dl_manager: tfds.download.DownloadManager
 ) -> Dict[str, tfds.core.SplitGenerator]:
     """Returns SplitGenerators."""
     data = dl_manager.download({'data': _URL})
     # There is no predefined train/val/test split for this dataset.
     return {
         tfds.Split.TRAIN: self._generate_examples(file_path=data['data'])
     }
Example #21
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        path = dl_manager.download_and_extract(
            'http://www.minethatdata.com/Kevin_Hillstrom_MineThatData_E-MailAnalytics_DataMiningChallenge_2008.03.20.csv'
        )

        return {
            'train': self._generate_examples(path),
        }
Example #22
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        size = self.builder_config.size
        fold = self.builder_config.fold
        path = dl_manager.download_and_extract(_URLS[size])

        return {
            split: self._generate_examples(path / f"Fold{fold}/{split}.txt")
            for split in ["train", "vali", "test"]
        }
Example #23
0
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    path = dl_manager.download_and_extract({
        'file_path':
            f'http://downloads.cs.stanford.edu/downloads/rt_benchmark/{self.builder_config.task}/ph/{self.builder_config.filename}.hdf5'
    })

    return {
        'train': self._generate_examples(path),
    }
Example #24
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        # TODO(esc50): Downloads the data and defines the splits
        path = dl_manager.download_and_extract(
            'https://github.com/karoldvl/ESC-50/archive/master.zip')

        # TODO(esc50): Returns the Dict[split names, Iterator[Key, Example]]
        return {
            'train': self._generate_examples(path / 'train_imgs'),
        }
Example #25
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     path = dl_manager.download_and_extract(_URL)
     return {
         tfds.Split.TRAIN:
         self._generate_examples(os.path.join(path, 'train.json')),
         tfds.Split.VALIDATION:
         self._generate_examples(os.path.join(path, 'dev.json')),
         tfds.Split.TEST:
         self._generate_examples(os.path.join(path, 'test.json')),
     }
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    path = dl_manager.download_and_extract(URL)
    subpath = dl_manager.extract(
        os.path.join(path, self.builder_config.language + ".tar.gz"))

    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.VALIDATION,
            gen_kwargs={"filepath": os.path.join(subpath, "dev")},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TEST,
            gen_kwargs={"filepath": os.path.join(subpath, "test")},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            gen_kwargs={"filepath": os.path.join(subpath, "train")},
        ),
    ]
Example #27
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        paths = dl_manager.download({
            'train': _TRAIN_URL,
            'test': _TEST_URL,
        })

        return {
            tfds.Split.TRAIN: self._generate_examples(paths['train']),
            tfds.Split.TEST: self._generate_examples(paths['test']),
        }
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     ds_dir = self.builder_config.dataset_dir
     ds_name = 'halfcheetah_' + self.builder_config.file_suffix + '.hdf5'
     path = dl_manager.download_and_extract({
         'file_path':
         'http://rail.eecs.berkeley.edu/datasets/offline_rl/' + ds_dir +
         '/' + ds_name
     })
     return {
         'train': self._generate_examples(path),
     }
Example #29
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     run = self.builder_config.name
     task = self.builder_config.task
     paths = dl_manager.download_and_extract({
         'file_paths':
         _get_files(prefix=f'gs://rl_unplugged/dmlab/{task}/{run}',
                    num_shards=self._SHARDS),
     })
     return {
         'train': self._generate_examples(paths),
     }
Example #30
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""
        file_path = dl_manager.download(
            'https://storage.googleapis.com/gresearch/ASQA/ASQA.json')

        with tf.io.gfile.GFile(file_path, 'r') as f:
            samples = json.load(f)

        return {
            'train': self._generate_examples(samples['train']),
            'dev': self._generate_examples(samples['dev']),
        }