Example #1
0
    def _dataset_split_generators(self, dl_manager):
        cifar_path = dl_manager.download_and_extract(self._cifar_info.url)

        def gen_filenames(filenames):
            for f in filenames:
                yield os.path.join(cifar_path, self._cifar_info.prefix, f)

        train_gen = functools.partial(
            self._generate_cifar_examples,
            filepaths=gen_filenames(self._cifar_info.train_files),
        )
        test_gen = functools.partial(
            self._generate_cifar_examples,
            filepaths=gen_filenames(self._cifar_info.test_files),
        )

        train_splits = [
            self._split_files(split=dataset_builder.Split.TRAIN, num_shards=10)
        ]
        test_splits = [
            self._split_files(split=dataset_builder.Split.TEST, num_shards=1)
        ]
        return [
            dataset_builder.SplitGenerator(generator_fn=train_gen,
                                           split_files=train_splits),
            dataset_builder.SplitGenerator(generator_fn=test_gen,
                                           split_files=test_splits),
        ]
  def _dataset_split_generators(self, dl_manager):
    def zero_to_thirty():
      for i in range(30):
        yield self.info.specs.encode_sample({
            "x": i,
            "y": np.array([-i]).astype(np.int64)[0],
            "z": tf.compat.as_text(str(i))
        })

    def thirty_to_forty():
      for i in range(30, 40):
        yield self.info.specs.encode_sample({
            "x": i,
            "y": np.array([-i]).astype(np.int64)[0],
            "z": tf.compat.as_text(str(i))
        })

    zero_to_thirty_splits = [
        self._split_files(split=dataset_builder.Split.TRAIN, num_shards=2),
        self._split_files(split=dataset_builder.Split.VALIDATION, num_shards=1)
    ]
    thirty_to_forty_splits = [
        self._split_files(split=dataset_builder.Split.TEST, num_shards=1)
    ]
    return [
        dataset_builder.SplitGenerator(generator_fn=zero_to_thirty,
                                       split_files=zero_to_thirty_splits),
        dataset_builder.SplitGenerator(generator_fn=thirty_to_forty,
                                       split_files=thirty_to_forty_splits),
    ]
Example #3
0
 def _dataset_split_generators(self):
     train_gen = lambda: self._generate_cifar_examples(is_training=True)
     test_gen = lambda: self._generate_cifar_examples(is_training=False)
     train_splits = [
         self._split_files(split=dataset_builder.Split.TRAIN, num_shards=10)
     ]
     test_splits = [
         self._split_files(split=dataset_builder.Split.TEST, num_shards=1)
     ]
     return [
         dataset_builder.SplitGenerator(generator_fn=train_gen,
                                        split_files=train_splits),
         dataset_builder.SplitGenerator(generator_fn=test_gen,
                                        split_files=test_splits),
     ]
 def _dataset_split_generators(self, dl_manager):
     # Split the 30 examples from the generator into 2 train shards and 1 test
     # shard.
     del dl_manager
     return [
         dataset_builder.SplitGenerator(generator_fn=dummy_data_generator,
                                        split_files=self.splits)
     ]
Example #5
0
 def _dataset_split_generators(self):
     # MNIST provides TRAIN and TEST splits, not a VALIDATION split, so we only
     # write the TRAIN and TEST splits to disk.
     train_gen = lambda: self._generate_mnist_examples(is_training=True)
     test_gen = lambda: self._generate_mnist_examples(is_training=False)
     train_splits = [
         self._split_files(split=dataset_builder.Split.TRAIN, num_shards=10)
     ]
     test_splits = [
         self._split_files(split=dataset_builder.Split.TEST, num_shards=1)
     ]
     return [
         dataset_builder.SplitGenerator(generator_fn=train_gen,
                                        split_files=train_splits),
         dataset_builder.SplitGenerator(generator_fn=test_gen,
                                        split_files=test_splits),
     ]
Example #6
0
    def _dataset_split_generators(self, dl_manager):

        # Download the full MNist Database
        filenames = {
            "train_data": _MNIST_TRAIN_DATA_FILENAME,
            "train_labels": _MNIST_TRAIN_LABELS_FILENAME,
            "test_data": _MNIST_TEST_DATA_FILENAME,
            "test_labels": _MNIST_TEST_LABELS_FILENAME,
        }
        mnist_files = dl_manager.download_and_extract({
            k: urllib.parse.urljoin(self.URL, v)
            for k, v in filenames.items()
        })

        # MNIST provides TRAIN and TEST splits, not a VALIDATION split, so we only
        # write the TRAIN and TEST splits to disk.
        train_gen = functools.partial(
            self._generate_mnist_examples,
            num_examples=_TRAIN_EXAMPLES,
            data_path=mnist_files["train_data"],
            label_path=mnist_files["train_labels"],
        )
        test_gen = functools.partial(
            self._generate_mnist_examples,
            num_examples=_TEST_EXAMPLES,
            data_path=mnist_files["test_data"],
            label_path=mnist_files["test_labels"],
        )
        train_splits = [
            self._split_files(split=dataset_builder.Split.TRAIN, num_shards=10)
        ]
        test_splits = [
            self._split_files(split=dataset_builder.Split.TEST, num_shards=1)
        ]
        return [
            dataset_builder.SplitGenerator(generator_fn=train_gen,
                                           split_files=train_splits),
            dataset_builder.SplitGenerator(generator_fn=test_gen,
                                           split_files=test_splits),
        ]
    def _dataset_split_generators(self):
        def zero_to_thirty():
            for i in range(30):
                yield {"x": i, "y": -i, "z": tf.compat.as_text(str(i))}

        def thirty_to_forty():
            for i in range(30, 40):
                yield {"x": i, "y": -i, "z": tf.compat.as_text(str(i))}

        zero_to_thirty_splits = [
            self._split_files(split=dataset_builder.Split.TRAIN, num_shards=2),
            self._split_files(split=dataset_builder.Split.VALIDATION,
                              num_shards=1)
        ]
        thirty_to_forty_splits = [
            self._split_files(split=dataset_builder.Split.TEST, num_shards=1)
        ]
        return [
            dataset_builder.SplitGenerator(generator_fn=zero_to_thirty,
                                           split_files=zero_to_thirty_splits),
            dataset_builder.SplitGenerator(generator_fn=thirty_to_forty,
                                           split_files=thirty_to_forty_splits),
        ]