Ejemplo n.º 1
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""

        manually_downloaded_files = {
            'test_all_frames': dl_manager.manual_dir / 'test_all_frames.zip',
            'test_annotations': dl_manager.manual_dir / 'test.json',
            'train_all_frames': dl_manager.manual_dir / 'train_all_frames.zip',
            'train_annotations': dl_manager.manual_dir / 'train.json',
            'valid_all_frames': dl_manager.manual_dir / 'valid_all_frames.zip',
            'valid_annotations': dl_manager.manual_dir / 'valid.json',
        }

        extracted_files = dl_manager.extract(manually_downloaded_files)

        return {
            tfds.Split.TRAIN:
            self._generate_examples(
                annotations=extracted_files['train_annotations'],
                all_frames=extracted_files['train_all_frames'] /
                'train_all_frames' / 'JPEGImages',
            ),
            tfds.Split.VALIDATION:
            self._generate_examples(
                annotations=extracted_files['valid_annotations'],
                all_frames=extracted_files['valid_all_frames'] /
                'valid_all_frames' / 'JPEGImages',
            ),
            tfds.Split.TEST:
            self._generate_examples(
                annotations=extracted_files['test_annotations'],
                all_frames=extracted_files['test_all_frames'] /
                'test_all_frames' / 'JPEGImages',
            ),
        }
Ejemplo n.º 2
0
def _maybe_prepare_manual_data(dl_manager: tfds.download.DownloadManager):
  """Return paths to the manually downloaded data if it is available."""

  # The file has a different name each time it is downloaded.
  manually_downloaded_files = [
      '1_AVA_HACS_TRAIN_*.zip',
      '2_AVA_HACS_VAL_*.zip',
  ]
  files = []
  for file in manually_downloaded_files:
    file_glob = [_ for _ in dl_manager.manual_dir.glob(file)]
    if not file_glob:  # No manually downloaded files.
      return None, None
    if len(file_glob) == 1:
      files.append(file_glob[0])
    else:
      raise ValueError(f'Unexpected multiple files matching pattern: '
                       f'{file} inside {os.fspath(dl_manager.manual_dir)}. '
                       f'There should only be one file matching this pattern.')
  return dl_manager.extract(files)
  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    path = dl_manager.download_and_extract(URL)
    subpath = dl_manager.extract(
        os.path.join(path, self.builder_config.language + ".tar.gz"))

    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.VALIDATION,
            gen_kwargs={"filepath": os.path.join(subpath, "dev")},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TEST,
            gen_kwargs={"filepath": os.path.join(subpath, "test")},
        ),
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            gen_kwargs={"filepath": os.path.join(subpath, "train")},
        ),
    ]
Ejemplo n.º 4
0
 def _split_generators(
     self, dl_manager: tfds.download.DownloadManager
 ) -> List[tfds.core.SplitGenerator]:
   """Returns SplitGenerators."""
   extracted_path = dl_manager.extract(
       os.path.join(dl_manager.manual_dir, "CORD-19-research-challenge.zip"))
   pd = tfds.core.lazy_imports.pandas
   df = pd.read_csv(os.path.join(extracted_path, "metadata.csv")).fillna("")
   data_paths = []
   for _, row in df.iterrows():
     file_dir = row["full_text_file"]
     if row["has_full_text"] and _has_abstract(row) and file_dir:
       d = {k: row[k] for k in _ADDITIONAL_FEATURES + [_ABSTRACT]}
       d["path"] = os.path.join(extracted_path, file_dir, file_dir,
                                row[_SHA] + ".json")
       data_paths.append(d)
   return [
       tfds.core.SplitGenerator(
           name=tfds.Split.TRAIN,
           gen_kwargs={"data_paths": data_paths},
       )
   ]
Ejemplo n.º 5
0
    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
        """Returns SplitGenerators."""

        manually_downloaded_files = {
            'train_all_frames': dl_manager.manual_dir / 'train_all_frames.zip',
            'train_annotations': dl_manager.manual_dir / 'train.json',
        }
        if self.builder_config.split_train_data_range is not None:
            # Create a custom training split by subsampling the training data.
            train_data_range = self.builder_config.split_train_data_range
        else:  # Use the provided training split.
            train_data_range = None

        if self.builder_config.split_val_data_range is not None:
            # Create a custom validation split by subsampling the training data.
            val_data_range = self.builder_config.split_val_data_range
            manually_downloaded_files[
                'valid_all_frames'] = manually_downloaded_files[
                    'train_all_frames']
            manually_downloaded_files[
                'valid_annotations'] = manually_downloaded_files[
                    'train_annotations']
        else:  # Use the provided validation split.
            val_data_range = None
            manually_downloaded_files[
                'valid_all_frames'] = dl_manager.manual_dir / 'valid_all_frames.zip'
            manually_downloaded_files[
                'valid_annotations'] = dl_manager.manual_dir / 'valid.json'

        if self.builder_config.split_test_data_range is not None:
            # Create a custom test split by subsampling the training data.
            test_data_range = self.builder_config.split_test_data_range
            manually_downloaded_files[
                'test_all_frames'] = manually_downloaded_files[
                    'train_all_frames']
            manually_downloaded_files[
                'test_annotations'] = manually_downloaded_files[
                    'train_annotations']
        else:  # Use the provided test split.
            test_data_range = None
            manually_downloaded_files[
                'test_all_frames'] = dl_manager.manual_dir / 'test_all_frames.zip'
            manually_downloaded_files[
                'test_annotations'] = dl_manager.manual_dir / 'test.json'

        extracted_files = dl_manager.extract(manually_downloaded_files)
        val_dir = 'train_all_frames' if val_data_range else 'valid_all_frames'
        test_dir = 'train_all_frames' if test_data_range else 'test_all_frames'

        return {
            tfds.Split.TRAIN:
            self._generate_examples(
                annotations=extracted_files['train_annotations'],
                all_frames=extracted_files['train_all_frames'] /
                'train_all_frames' / 'JPEGImages',
                video_range_to_use=train_data_range,
            ),
            tfds.Split.VALIDATION:
            self._generate_examples(
                annotations=extracted_files['valid_annotations'],
                all_frames=extracted_files['valid_all_frames'] / val_dir /
                'JPEGImages',
                video_range_to_use=val_data_range,
            ),
            tfds.Split.TEST:
            self._generate_examples(
                annotations=extracted_files['test_annotations'],
                all_frames=extracted_files['test_all_frames'] / test_dir /
                'JPEGImages',
                video_range_to_use=test_data_range,
            ),
        }
Ejemplo n.º 6
0
 def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     archive_path = dl_manager.manual_dir / 'train_1.zip'
     extracted_path = dl_manager.extract(archive_path)
     return {'train': self._generate_examples(extracted_path / 'train_1')}