def test_writing(self): # First read in stuff. mnist_builder = mnist.MNIST( data_dir=tempfile.mkdtemp(dir=self.get_temp_dir())) info = dataset_info.DatasetInfo( builder=mnist_builder, features=mnist_builder.info.features) info.read_from_directory(_INFO_DIR) # Read the json file into a string. with tf.io.gfile.GFile(info._dataset_info_path(_INFO_DIR)) as f: existing_json = json.load(f) # Now write to a temp directory. with testing.tmp_dir(self.get_temp_dir()) as tmp_dir: info.write_to_directory(tmp_dir) # Read the newly written json file into a string. with tf.io.gfile.GFile(info._dataset_info_path(tmp_dir)) as f: new_json = json.load(f) # Read the newly written LICENSE file into a string. with tf.io.gfile.GFile(info._license_path(tmp_dir)) as f: license_ = f.read() # Assert what was read and then written and read again is the same. self.assertEqual(existing_json, new_json) # Assert correct license was written. self.assertEqual(existing_json["redistributionInfo"]["license"], license_) if six.PY3: # Only test on Python 3 to avoid u'' formatting issues self.assertEqual(repr(info), INFO_STR)
def test_writing(self): # First read in stuff. mnist_builder = mnist.MNIST(data_dir=tempfile.mkdtemp( dir=self.get_temp_dir())) info = dataset_info.DatasetInfo(builder=mnist_builder, features=mnist_builder.info.features) info.read_from_directory(_INFO_DIR) # Read the json file into a string. with tf.io.gfile.GFile(info._dataset_info_path(_INFO_DIR)) as f: existing_json = json.load(f) # Now write to a temp directory. with testing.tmp_dir(self.get_temp_dir()) as tmp_dir: info.write_to_directory(tmp_dir) # Read the newly written json file into a string. with tf.io.gfile.GFile(info._dataset_info_path(tmp_dir)) as f: new_json = json.load(f) # Read the newly written LICENSE file into a string. with tf.io.gfile.GFile(info._license_path(tmp_dir)) as f: license_ = f.read() # Assert what was read and then written and read again is the same. self.assertEqual(existing_json, new_json) # Assert correct license was written. self.assertEqual(existing_json["redistributionInfo"]["license"], license_) # Do not check the full string as it display the generated path. self.assertEqual(_INFO_STR % mnist_builder.data_dir, repr(info)) self.assertIn("'test': <SplitInfo num_examples=", repr(info))
def test_reading_from_gcs_bucket(self): # The base TestCase prevents GCS access, so we explicitly ask it to restore # access here. with self.gcs_access(): mnist_builder = mnist.MNIST(data_dir=tempfile.mkdtemp( dir=self.get_temp_dir())) info = dataset_info.DatasetInfo(builder=mnist_builder) info = mnist_builder.info # A nominal check to see if we read it. self.assertTrue(info.initialized) self.assertEqual(10000, info.splits["test"].num_examples)
def test_str_smoke(self): info = mnist.MNIST(data_dir="/tmp/some_dummy_dir").info _ = str(info)
def setUpClass(cls): super(DatasetInfoTest, cls).setUpClass() cls._tfds_tmp_dir = testing.make_tmp_dir() cls._builder = mnist.MNIST(data_dir=cls._tfds_tmp_dir)